open-policy-agent · JaydipGabani · Feb 5, 2026 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
@@ -30,10 +30,6 @@ jobs:
     name: "Test Gator"
     runs-on: ubuntu-22.04
     timeout-minutes: 5
-    strategy:
-      fail-fast: false
-      matrix:
-        KUBERNETES_VERSION: ["1.31.6", "1.32.3", "1.33.2"] # Latest available versions of Kubernetes at - https://hub.docker.com/r/kindest/node/tags
     steps:
       - name: Harden Runner
         uses: step-security/harden-runner@95d9a5deda9de15063e7595e9719c11c38c90ae2 # v2.13.2
@@ -49,11 +45,104 @@ jobs:
           go-version: "1.25"
           check-latest: true
 
-      - name: Download e2e dependencies
-        run: |
-          mkdir -p $GITHUB_WORKSPACE/bin
-          echo "$GITHUB_WORKSPACE/bin" >> $GITHUB_PATH
-          make e2e-dependencies KUBERNETES_VERSION=${{ matrix.KUBERNETES_VERSION }}
-
       - name: gator test
         run: make test-gator-containerized
+
+  gator_bench_test:
+    name: "Gator Bench E2E"
+    runs-on: ubuntu-22.04
+    timeout-minutes: 10
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@95d9a5deda9de15063e7595e9719c11c38c90ae2 # v2.13.2
+        with:
+          egress-policy: audit
+
+      - name: Check out code into the Go module directory
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+
+      - name: Set up Go
+        uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
+        with:
+          go-version: "1.25"
+          check-latest: true
+
+      - name: Build gator
+        run: make gator
+
+      - name: Test basic Rego policy benchmark
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 50 \
+            --output table
+
+      - name: Test CEL policy benchmark
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/cel/ \
+            --iterations 50 \
+            --engine cel \
+            --output table
+
+      - name: Test dual-engine policy benchmark
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/both/ \
+            --iterations 50 \
+            --output table
+
+      - name: Test memory profiling
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 20 \
+            --memory \
+            --output table
+
+      - name: Test concurrent execution
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 100 \
+            --concurrency 4 \
+            --output table
+
+      - name: Test JSON output
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 20 \
+            --output json | jq .
+
+      - name: Test baseline save and compare
+        run: |
+          # Save baseline
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --save /tmp/baseline.json
+
+          # Compare against baseline (should pass with 50% threshold)
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --compare /tmp/baseline.json \
+            --threshold 50
+
+      - name: Test min-threshold
+        run: |
+          # Save baseline
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --save /tmp/baseline-min.json
+
+          # Compare with strict threshold (0.1%) but loose min-threshold (1s)
+          # This ensures the flag prevents failure from small variations
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --compare /tmp/baseline-min.json \
+            --threshold 0.1 \
+            --min-threshold 1s
@@ -273,4 +273,4 @@ jobs:
         with:
           name: logs-${{ matrix.KUBERNETES_VERSION }}
           path: |
-            logs-*.json
+            logs-*.json
@@ -0,0 +1,240 @@
+package bench
+
+import (
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	cmdutils "github.com/open-policy-agent/gatekeeper/v3/cmd/gator/util"
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/gator/bench"
+	"github.com/spf13/cobra"
+)
+
+const (
+	examples = `# Benchmark policies with default settings (1000 iterations, rego engine)
+gator bench --filename="policies/"
+
+# Benchmark with both Rego and CEL engines
+gator bench --filename="policies/" --engine=all
+
+# Benchmark with custom iterations and warmup
+gator bench --filename="policies/" --iterations=500 --warmup=50
+
+# Benchmark with concurrent load (simulates real webhook traffic)
+gator bench --filename="policies/" --concurrency=10
+
+# Output results as JSON
+gator bench --filename="policies/" --output=json
+
+# Benchmark policies from multiple sources
+gator bench --filename="templates/" --filename="constraints/" --filename="resources/"
+
+# Benchmark from OCI image
+gator bench --image="ghcr.io/example/policies:latest"
+
+# Benchmark with memory profiling
+gator bench --filename="policies/" --memory
+
+# Save benchmark results as baseline
+gator bench --filename="policies/" --save=baseline.json
+
+# Compare against baseline (fail if >10% regression or >1ms absolute increase)
+gator bench --filename="policies/" --compare=baseline.json --threshold=10 --min-threshold=1ms`
+)
+
+// Cmd is the cobra command for the bench subcommand.
+var Cmd = &cobra.Command{
+	Use:   "bench",
+	Short: "Benchmark policy evaluation performance",
+	Long: `Benchmark evaluates the performance of Gatekeeper policies by running
+constraint evaluation against test resources and measuring latency metrics.
+
+This command loads ConstraintTemplates, Constraints, and Kubernetes resources
+from the specified files or directories, then repeatedly evaluates the resources
+against the constraints to gather performance statistics.
+
+Supports both Rego and CEL policy engines for comparison.`,
+	Example: examples,
+	Run:     run,
+	Args:    cobra.NoArgs,
+}
+
+var (
+	flagFilenames    []string
+	flagImages       []string
+	flagTempDir      string
+	flagEngine       string
+	flagIterations   int
+	flagWarmup       int
+	flagConcurrency  int
+	flagOutput       string
+	flagStats        bool
+	flagMemory       bool
+	flagSave         string
+	flagCompare      string
+	flagThreshold    float64
+	flagMinThreshold time.Duration
+)
+
+const (
+	flagNameFilename     = "filename"
+	flagNameImage        = "image"
+	flagNameTempDir      = "tempdir"
+	flagNameEngine       = "engine"
+	flagNameIterations   = "iterations"
+	flagNameWarmup       = "warmup"
+	flagNameConcurrency  = "concurrency"
+	flagNameOutput       = "output"
+	flagNameStats        = "stats"
+	flagNameMemory       = "memory"
+	flagNameSave         = "save"
+	flagNameCompare      = "compare"
+	flagNameThreshold    = "threshold"
+	flagNameMinThreshold = "min-threshold"
+)
+
+func init() {
+	Cmd.Flags().StringArrayVarP(&flagFilenames, flagNameFilename, "f", []string{},
+		"a file or directory containing ConstraintTemplates, Constraints, and resources to benchmark. Can be specified multiple times.")
+	Cmd.Flags().StringArrayVarP(&flagImages, flagNameImage, "i", []string{},
+		"a URL to an OCI image containing policies. Can be specified multiple times.")
+	Cmd.Flags().StringVarP(&flagTempDir, flagNameTempDir, "d", "",
+		"temporary directory to download and unpack images to.")
+	Cmd.Flags().StringVarP(&flagEngine, flagNameEngine, "e", string(bench.EngineRego),
+		fmt.Sprintf("policy engine to benchmark. One of: %s|%s|%s", bench.EngineRego, bench.EngineCEL, bench.EngineAll))
+	Cmd.Flags().IntVarP(&flagIterations, flagNameIterations, "n", 1000,
+		"number of benchmark iterations to run. Use at least 1000 for meaningful P99 metrics.")
+	Cmd.Flags().IntVar(&flagWarmup, flagNameWarmup, 10,
+		"number of warmup iterations before measurement.")
+	Cmd.Flags().IntVarP(&flagConcurrency, flagNameConcurrency, "c", 1,
+		"number of concurrent goroutines for reviews. Higher values simulate realistic webhook load.")
+	Cmd.Flags().StringVarP(&flagOutput, flagNameOutput, "o", "table",
+		"output format. One of: table|json|yaml")
+	Cmd.Flags().BoolVar(&flagStats, flagNameStats, false,
+		"gather detailed statistics from the constraint framework.")
+	Cmd.Flags().BoolVar(&flagMemory, flagNameMemory, false,
+		"enable memory profiling to track allocations per review.")
+	Cmd.Flags().StringVar(&flagSave, flagNameSave, "",
+		"save benchmark results to this file for future comparison (supports .json and .yaml).")
+	Cmd.Flags().StringVar(&flagCompare, flagNameCompare, "",
+		"compare results against a baseline file (supports .json and .yaml).")
+	Cmd.Flags().Float64Var(&flagThreshold, flagNameThreshold, 10.0,
+		"regression threshold percentage for comparison. Exit code 1 if exceeded.")
+	Cmd.Flags().DurationVar(&flagMinThreshold, flagNameMinThreshold, 0,
+		"minimum absolute latency difference to consider a regression (e.g., 1ms). Prevents false positives on fast policies.")
+}
+
+func run(_ *cobra.Command, _ []string) {
+	// Validate engine flag
+	engine, err := parseEngine(flagEngine)
+	if err != nil {
+		cmdutils.ErrFatalf("invalid engine: %v", err)
+	}
+
+	// Validate output format
+	outputFormat, err := bench.ParseOutputFormat(flagOutput)
+	if err != nil {
+		cmdutils.ErrFatalf("invalid output format: %v", err)
+	}
+
+	// Validate inputs
+	if len(flagFilenames) == 0 && len(flagImages) == 0 {
+		cmdutils.ErrFatalf("at least one --filename or --image must be specified")
+	}
+
+	if flagIterations <= 0 {
+		cmdutils.ErrFatalf("iterations must be positive")
+	}
+
+	if flagWarmup < 0 {
+		cmdutils.ErrFatalf("warmup must be non-negative")
+	}
+
+	if flagThreshold < 0 {
+		cmdutils.ErrFatalf("threshold must be non-negative")
+	}
+
+	if flagConcurrency < 1 {
+		cmdutils.ErrFatalf("concurrency must be at least 1")
+	}
+
+	// Run benchmark
+	opts := &bench.Opts{
+		Filenames:    flagFilenames,
+		Images:       flagImages,
+		TempDir:      flagTempDir,
+		Engine:       engine,
+		Iterations:   flagIterations,
+		Warmup:       flagWarmup,
+		Concurrency:  flagConcurrency,
+		GatherStats:  flagStats,
+		Memory:       flagMemory,
+		Save:         flagSave,
+		Baseline:     flagCompare,
+		Threshold:    flagThreshold,
+		MinThreshold: flagMinThreshold,
+		Writer:       os.Stderr,
+	}
+
+	results, err := bench.Run(opts)
+	if err != nil {
+		cmdutils.ErrFatalf("benchmark failed: %v", err)
+	}
+
+	// Format and print results
+	output, err := bench.FormatResults(results, outputFormat)
+	if err != nil {
+		cmdutils.ErrFatalf("formatting results: %v", err)
+	}
+
+	fmt.Print(output)
+
+	// Save results if requested
+	if flagSave != "" {
+		if err := bench.SaveResults(results, flagSave); err != nil {
+			cmdutils.ErrFatalf("saving results: %v", err)
+		}
+		fmt.Fprintf(os.Stderr, "\nResults saved to: %s\n", flagSave)
+	}
+
+	// Compare against baseline if requested
+	exitCode := 0
+	if flagCompare != "" {
+		baseline, err := bench.LoadBaseline(flagCompare)
+		if err != nil {
+			cmdutils.ErrFatalf("loading baseline: %v", err)
+		}
+
+		comparisons := bench.Compare(baseline, results, flagThreshold, flagMinThreshold)
+		if len(comparisons) == 0 {
+			fmt.Fprintf(os.Stderr, "\nWarning: No matching engines found for comparison\n")
+		} else {
+			fmt.Println()
+			fmt.Print(bench.FormatComparison(comparisons, flagThreshold))
+
+			// Check if any comparison failed
+			for _, comp := range comparisons {
+				if !comp.Passed {
+					exitCode = 1
+					break
+				}
+			}
+		}
+	}
+
+	os.Exit(exitCode)
+}
+
+func parseEngine(s string) (bench.Engine, error) {
+	switch strings.ToLower(s) {
+	case string(bench.EngineRego):
+		return bench.EngineRego, nil
+	case string(bench.EngineCEL):
+		return bench.EngineCEL, nil
+	case string(bench.EngineAll):
+		return bench.EngineAll, nil
+	default:
+		return "", fmt.Errorf("invalid engine %q (valid: %s, %s, %s)", s, bench.EngineRego, bench.EngineCEL, bench.EngineAll)
+	}
+}
@@ -3,6 +3,7 @@ package main
 import (
 	"os"
 
+	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/bench"
 	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/expand"
 	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/sync"
 	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/test"
@@ -17,6 +18,7 @@ var commands = []*cobra.Command{
 	test.Cmd,
 	expand.Cmd,
 	sync.Cmd,
+	bench.Cmd,
 	k8sVersion.WithFont("alligator2"),
 }