diff --git a/.github/workflows/test-gator.yaml b/.github/workflows/test-gator.yaml
index 7de809cccd0..a79e9756515 100644
--- a/.github/workflows/test-gator.yaml
+++ b/.github/workflows/test-gator.yaml
@@ -30,10 +30,6 @@ jobs:
     name: "Test Gator"
     runs-on: ubuntu-22.04
     timeout-minutes: 5
-    strategy:
-      fail-fast: false
-      matrix:
-        KUBERNETES_VERSION: ["1.31.6", "1.32.3", "1.33.2"] # Latest available versions of Kubernetes at - https://hub.docker.com/r/kindest/node/tags
     steps:
       - name: Harden Runner
         uses: step-security/harden-runner@e3f713f2d8f53843e71c69a996d56f51aa9adfb9 # v2.14.1
@@ -49,11 +45,115 @@ jobs:
           go-version: "1.25"
           check-latest: true
 
-      - name: Download e2e dependencies
-        run: |
-          mkdir -p $GITHUB_WORKSPACE/bin
-          echo "$GITHUB_WORKSPACE/bin" >> $GITHUB_PATH
-          make e2e-dependencies KUBERNETES_VERSION=${{ matrix.KUBERNETES_VERSION }}
-
       - name: gator test
         run: make test-gator-containerized
+
+  gator_bench_test:
+    name: "Gator Bench E2E"
+    runs-on: ubuntu-22.04
+    timeout-minutes: 10
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0
+        with:
+          egress-policy: audit
+
+      - name: Check out code into the Go module directory
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+
+      - name: Set up Go
+        uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
+        with:
+          go-version: "1.25"
+          check-latest: true
+
+      - name: Build gator
+        run: make gator
+
+      - name: Test basic Rego policy benchmark
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 50 \
+            --engine rego \
+            --output table
+
+      - name: Test CEL policy benchmark
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/cel/ \
+            --iterations 50 \
+            --engine cel \
+            --output table
+
+      - name: Test dual-engine policy benchmark
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/both/ \
+            --iterations 50 \
+            --engine all \
+            --output table
+
+      - name: Test memory profiling
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 20 \
+            --engine rego \
+            --memory \
+            --output table
+
+      - name: Test concurrent execution
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 100 \
+            --engine rego \
+            --concurrency 4 \
+            --output table
+
+      - name: Test JSON output
+        run: |
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 20 \
+            --engine rego \
+            --output json | jq .
+
+      - name: Test baseline save and compare
+        run: |
+          # Save baseline
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --engine rego \
+            --save /tmp/baseline.json
+
+          # Compare against baseline - using high min-threshold since we're testing
+          # functionality not actual performance values in CI
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --engine rego \
+            --compare /tmp/baseline.json \
+            --threshold 50 \
+            --min-threshold 100ms
+
+      - name: Test min-threshold
+        run: |
+          # Save baseline
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --engine rego \
+            --save /tmp/baseline-min.json
+
+          # Compare with strict threshold (0.1%) but loose min-threshold (1s)
+          # This ensures the flag prevents failure from small variations
+          ./bin/gator bench \
+            --filename test/gator/bench/basic/ \
+            --iterations 30 \
+            --engine rego \
+            --compare /tmp/baseline-min.json \
+            --threshold 0.1 \
+            --min-threshold 1s
diff --git a/.github/workflows/workflow.yaml b/.github/workflows/workflow.yaml
index a8d0f9cfb7d..3b4db9da244 100644
--- a/.github/workflows/workflow.yaml
+++ b/.github/workflows/workflow.yaml
@@ -271,6 +271,6 @@ jobs:
         uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
         if: ${{ always() }}
         with:
-          name: logs-${{ matrix.KUBERNETES_VERSION }}
+          name: logs-ownerreferences-admission-plugin
           path: |
-            logs-*.json
\ No newline at end of file
+            logs-*.json
diff --git a/cmd/gator/bench/bench.go b/cmd/gator/bench/bench.go
new file mode 100644
index 00000000000..1cfe3d5628a
--- /dev/null
+++ b/cmd/gator/bench/bench.go
@@ -0,0 +1,259 @@
+package bench
+
+import (
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	cmdutils "github.com/open-policy-agent/gatekeeper/v3/cmd/gator/util"
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/gator/bench"
+	"github.com/spf13/cobra"
+)
+
+const (
+	examples = `# Benchmark policies with default settings (1000 iterations, cel engine)
+gator bench --filename="policies/"
+
+# Benchmark with both Rego and CEL engines
+gator bench --filename="policies/" --engine=all
+
+# Benchmark with custom iterations and warmup
+gator bench --filename="policies/" --iterations=500 --warmup=50
+
+# Benchmark with concurrent load (simulates real webhook traffic)
+gator bench --filename="policies/" --concurrency=10
+
+# Output results as JSON
+gator bench --filename="policies/" --output=json
+
+# Benchmark policies from multiple sources
+gator bench --filename="templates/" --filename="constraints/" --filename="resources/"
+
+# Benchmark from OCI image
+gator bench --image="ghcr.io/example/policies:latest"
+
+# Benchmark with memory profiling
+gator bench --filename="policies/" --memory
+
+# Save benchmark results as baseline
+gator bench --filename="policies/" --save=baseline.json
+
+# Compare against baseline (fail only if BOTH >10% regression AND >1ms absolute increase)
+# This prevents false positives for fast policies where small absolute changes appear as large percentages
+gator bench --filename="policies/" --compare=baseline.json --threshold=10 --min-threshold=1ms`
+)
+
+// Cmd is the cobra command for the bench subcommand.
+var Cmd = &cobra.Command{
+	Use:   "bench",
+	Short: "Benchmark policy evaluation performance",
+	Long: `Benchmark evaluates the performance of Gatekeeper policies by running
+constraint evaluation against test resources and measuring latency metrics.
+
+This command loads ConstraintTemplates, Constraints, and Kubernetes resources
+from the specified files or directories, then repeatedly evaluates the resources
+against the constraints to gather performance statistics.
+
+Supports both Rego and CEL policy engines for comparison.`,
+	Example: examples,
+	Run:     run,
+	Args:    cobra.NoArgs,
+}
+
+var (
+	flagFilenames    []string
+	flagImages       []string
+	flagTempDir      string
+	flagEngine       string
+	flagIterations   int
+	flagWarmup       int
+	flagConcurrency  int
+	flagOutput       string
+	flagStats        bool
+	flagMemory       bool
+	flagSave         string
+	flagCompare      string
+	flagThreshold    float64
+	flagMinThreshold time.Duration
+)
+
+const (
+	flagNameFilename     = "filename"
+	flagNameImage        = "image"
+	flagNameTempDir      = "tempdir"
+	flagNameEngine       = "engine"
+	flagNameIterations   = "iterations"
+	flagNameWarmup       = "warmup"
+	flagNameConcurrency  = "concurrency"
+	flagNameOutput       = "output"
+	flagNameStats        = "stats"
+	flagNameMemory       = "memory"
+	flagNameSave         = "save"
+	flagNameCompare      = "compare"
+	flagNameThreshold    = "threshold"
+	flagNameMinThreshold = "min-threshold"
+)
+
+func init() {
+	Cmd.Flags().StringArrayVarP(&flagFilenames, flagNameFilename, "f", []string{},
+		"a file or directory containing ConstraintTemplates, Constraints, and resources to benchmark. Can be specified multiple times.")
+	Cmd.Flags().StringArrayVarP(&flagImages, flagNameImage, "i", []string{},
+		"a URL to an OCI image containing policies. Can be specified multiple times.")
+	Cmd.Flags().StringVarP(&flagTempDir, flagNameTempDir, "d", "",
+		"temporary directory to download and unpack images to.")
+	Cmd.Flags().StringVarP(&flagEngine, flagNameEngine, "e", string(bench.EngineCEL),
+		fmt.Sprintf("policy engine to benchmark. One of: %s|%s|%s", bench.EngineRego, bench.EngineCEL, bench.EngineAll))
+	Cmd.Flags().IntVarP(&flagIterations, flagNameIterations, "n", 1000,
+		"number of benchmark iterations to run. Use at least 1000 for meaningful P99 metrics.")
+	Cmd.Flags().IntVar(&flagWarmup, flagNameWarmup, 10,
+		"number of warmup iterations before measurement.")
+	Cmd.Flags().IntVarP(&flagConcurrency, flagNameConcurrency, "c", 1,
+		"number of concurrent goroutines for reviews. Higher values simulate realistic webhook load.")
+	Cmd.Flags().StringVarP(&flagOutput, flagNameOutput, "o", "table",
+		"output format. One of: table|json|yaml")
+	Cmd.Flags().BoolVar(&flagStats, flagNameStats, false,
+		"gather detailed statistics from the constraint framework.")
+	Cmd.Flags().BoolVar(&flagMemory, flagNameMemory, false,
+		"enable memory profiling to track allocations per review.")
+	Cmd.Flags().StringVar(&flagSave, flagNameSave, "",
+		"save benchmark results to this file for future comparison (supports .json and .yaml).")
+	Cmd.Flags().StringVar(&flagCompare, flagNameCompare, "",
+		"compare results against a baseline file (supports .json and .yaml).")
+	Cmd.Flags().Float64Var(&flagThreshold, flagNameThreshold, 10.0,
+		"regression threshold percentage for comparison. Exit code 1 if exceeded.")
+	Cmd.Flags().DurationVar(&flagMinThreshold, flagNameMinThreshold, 0,
+		"minimum absolute latency difference to consider a regression (e.g., 1ms). Prevents false positives on fast policies where small absolute changes appear as large percentages.")
+}
+
+func run(_ *cobra.Command, _ []string) {
+	// Validate engine flag
+	engine, err := parseEngine(flagEngine)
+	if err != nil {
+		cmdutils.ErrFatalf("invalid engine: %v", err)
+	}
+
+	// Validate output format
+	outputFormat, err := bench.ParseOutputFormat(flagOutput)
+	if err != nil {
+		cmdutils.ErrFatalf("invalid output format: %v", err)
+	}
+
+	// Validate inputs
+	if len(flagFilenames) == 0 && len(flagImages) == 0 {
+		cmdutils.ErrFatalf("at least one --filename or --image must be specified")
+	}
+
+	if flagIterations <= 0 {
+		cmdutils.ErrFatalf("iterations must be positive")
+	}
+
+	if flagWarmup < 0 {
+		cmdutils.ErrFatalf("warmup must be non-negative")
+	}
+
+	if flagThreshold < 0 {
+		cmdutils.ErrFatalf("threshold must be non-negative")
+	}
+
+	if flagMinThreshold < 0 {
+		cmdutils.ErrFatalf("min-threshold must be non-negative")
+	}
+
+	if flagConcurrency < 1 {
+		cmdutils.ErrFatalf("concurrency must be at least 1")
+	}
+
+	// Warn if warmup exceeds iterations (likely user error)
+	if flagWarmup > flagIterations {
+		fmt.Fprintf(os.Stderr, "Warning: warmup (%d) exceeds iterations (%d). Consider reducing warmup.\n\n", flagWarmup, flagIterations)
+	}
+
+	// Validate baseline file exists before running expensive benchmark
+	if flagCompare != "" {
+		if _, err := os.Stat(flagCompare); os.IsNotExist(err) {
+			cmdutils.ErrFatalf("baseline file does not exist: %s", flagCompare)
+		} else if err != nil {
+			cmdutils.ErrFatalf("cannot access baseline file: %v", err)
+		}
+	}
+
+	// Run benchmark
+	opts := &bench.Opts{
+		Filenames:    flagFilenames,
+		Images:       flagImages,
+		TempDir:      flagTempDir,
+		Engine:       engine,
+		Iterations:   flagIterations,
+		Warmup:       flagWarmup,
+		Concurrency:  flagConcurrency,
+		GatherStats:  flagStats,
+		Memory:       flagMemory,
+		Save:         flagSave,
+		Baseline:     flagCompare,
+		Threshold:    flagThreshold,
+		MinThreshold: flagMinThreshold,
+		Writer:       os.Stderr,
+	}
+
+	results, err := bench.Run(opts)
+	if err != nil {
+		cmdutils.ErrFatalf("benchmark failed: %v", err)
+	}
+
+	// Format and print results
+	output, err := bench.FormatResults(results, outputFormat)
+	if err != nil {
+		cmdutils.ErrFatalf("formatting results: %v", err)
+	}
+
+	fmt.Print(output)
+
+	// Save results if requested
+	if flagSave != "" {
+		if err := bench.SaveResults(results, flagSave); err != nil {
+			cmdutils.ErrFatalf("saving results: %v", err)
+		}
+		fmt.Fprintf(os.Stderr, "\nResults saved to: %s\n", flagSave)
+	}
+
+	// Compare against baseline if requested
+	exitCode := 0
+	if flagCompare != "" {
+		baseline, err := bench.LoadBaseline(flagCompare)
+		if err != nil {
+			cmdutils.ErrFatalf("loading baseline: %v", err)
+		}
+
+		comparisons := bench.Compare(baseline, results, flagThreshold, flagMinThreshold)
+		if len(comparisons) == 0 {
+			fmt.Fprintf(os.Stderr, "\nWarning: No matching engines found for comparison\n")
+		} else {
+			fmt.Println()
+			fmt.Print(bench.FormatComparison(comparisons, flagThreshold))
+
+			// Check if any comparison failed
+			for _, comp := range comparisons {
+				if !comp.Passed {
+					exitCode = 1
+					break
+				}
+			}
+		}
+	}
+
+	os.Exit(exitCode)
+}
+
+func parseEngine(s string) (bench.Engine, error) {
+	switch strings.ToLower(s) {
+	case string(bench.EngineRego):
+		return bench.EngineRego, nil
+	case string(bench.EngineCEL):
+		return bench.EngineCEL, nil
+	case string(bench.EngineAll):
+		return bench.EngineAll, nil
+	default:
+		return "", fmt.Errorf("invalid engine %q (valid: %s, %s, %s)", s, bench.EngineRego, bench.EngineCEL, bench.EngineAll)
+	}
+}
diff --git a/cmd/gator/gator.go b/cmd/gator/gator.go
index cd0c57e363e..3c5af9e8542 100644
--- a/cmd/gator/gator.go
+++ b/cmd/gator/gator.go
@@ -3,6 +3,7 @@ package main
 import (
 	"os"
 
+	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/bench"
 	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/expand"
 	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/sync"
 	"github.com/open-policy-agent/gatekeeper/v3/cmd/gator/test"
@@ -17,6 +18,7 @@ var commands = []*cobra.Command{
 	test.Cmd,
 	expand.Cmd,
 	sync.Cmd,
+	bench.Cmd,
 	k8sVersion.WithFont("alligator2"),
 }
 
diff --git a/go.mod b/go.mod
index 443151e8892..dd48ad45d0b 100644
--- a/go.mod
+++ b/go.mod
@@ -37,6 +37,7 @@ require (
 	golang.org/x/time v0.14.0
 	google.golang.org/grpc v1.77.0
 	google.golang.org/protobuf v1.36.11
+	gopkg.in/yaml.v3 v3.0.1
 	k8s.io/api v0.35.0
 	k8s.io/apiextensions-apiserver v0.35.0
 	k8s.io/apimachinery v0.35.0
@@ -163,7 +164,6 @@ require (
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect
 	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
-	gopkg.in/yaml.v3 v3.0.1 // indirect
 	k8s.io/component-base v0.35.0 // indirect
 	k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
 	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
diff --git a/pkg/gator/bench/bench.go b/pkg/gator/bench/bench.go
new file mode 100644
index 00000000000..35c2b1cb510
--- /dev/null
+++ b/pkg/gator/bench/bench.go
@@ -0,0 +1,513 @@
+package bench
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/open-policy-agent/frameworks/constraint/pkg/apis"
+	constraintclient "github.com/open-policy-agent/frameworks/constraint/pkg/client"
+	"github.com/open-policy-agent/frameworks/constraint/pkg/client/drivers/rego"
+	clienterrors "github.com/open-policy-agent/frameworks/constraint/pkg/client/errors"
+	"github.com/open-policy-agent/frameworks/constraint/pkg/client/reviews"
+	"github.com/open-policy-agent/frameworks/constraint/pkg/instrumentation"
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/drivers/k8scel"
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/gator/reader"
+	mutationtypes "github.com/open-policy-agent/gatekeeper/v3/pkg/mutation/types"
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/target"
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/util"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	k8sruntime "k8s.io/apimachinery/pkg/runtime"
+)
+
+const (
+	// MinIterationsForP99 is the minimum number of iterations recommended for
+	// statistically meaningful P99 metrics.
+	MinIterationsForP99 = 1000
+)
+
+var scheme *k8sruntime.Scheme
+
+func init() {
+	scheme = k8sruntime.NewScheme()
+	if err := apis.AddToScheme(scheme); err != nil {
+		panic(err)
+	}
+}
+
+// Run executes the benchmark with the given options and returns results
+// for each engine tested.
+func Run(opts *Opts) ([]Results, error) {
+	// Warn if iterations are too low for meaningful P99 statistics
+	if opts.Iterations < MinIterationsForP99 && opts.Writer != nil {
+		fmt.Fprintf(opts.Writer, "Warning: %d iterations may not provide statistically meaningful P99 metrics. Consider using at least %d iterations.\n\n",
+			opts.Iterations, MinIterationsForP99)
+	}
+
+	// Default concurrency to 1 (sequential)
+	if opts.Concurrency < 1 {
+		opts.Concurrency = 1
+	}
+
+	// Read all resources from files/images
+	objs, err := reader.ReadSources(opts.Filenames, opts.Images, opts.TempDir)
+	if err != nil {
+		return nil, fmt.Errorf("reading sources: %w", err)
+	}
+	if len(objs) == 0 {
+		return nil, fmt.Errorf("no input data identified")
+	}
+
+	// Categorize objects
+	var templates []*unstructured.Unstructured
+	var constraints []*unstructured.Unstructured
+	var reviewObjs []*unstructured.Unstructured
+
+	for _, obj := range objs {
+		switch {
+		case reader.IsTemplate(obj):
+			templates = append(templates, obj)
+		case reader.IsConstraint(obj):
+			constraints = append(constraints, obj)
+		default:
+			// Everything else is a potential review object
+			reviewObjs = append(reviewObjs, obj)
+		}
+	}
+
+	if len(templates) == 0 {
+		return nil, fmt.Errorf("no ConstraintTemplates found in input")
+	}
+	if len(constraints) == 0 {
+		return nil, fmt.Errorf("no Constraints found in input")
+	}
+	if len(reviewObjs) == 0 {
+		return nil, fmt.Errorf("no objects to review found in input")
+	}
+
+	var results []Results
+	var warnings []string
+
+	// Determine which engines to benchmark
+	engines := []Engine{opts.Engine}
+	if opts.Engine == EngineAll {
+		engines = []Engine{EngineRego, EngineCEL}
+	}
+
+	for _, engine := range engines {
+		result, err := runBenchmark(engine, templates, constraints, reviewObjs, opts)
+		if err != nil {
+			// For "all" engine mode, record warning and continue with other engines
+			if opts.Engine == EngineAll {
+				warnings = append(warnings, fmt.Sprintf("%s: %s", engine, err.Error()))
+				continue
+			}
+			return nil, fmt.Errorf("benchmarking %s: %w", engine, err)
+		}
+		results = append(results, *result)
+	}
+
+	// Check if we have any results
+	if len(results) == 0 {
+		return nil, fmt.Errorf("no engines could process the templates: %v", warnings)
+	}
+
+	// Add warnings about skipped engines to the first result for visibility
+	if len(warnings) > 0 && len(results) > 0 && opts.Writer != nil {
+		for _, w := range warnings {
+			fmt.Fprintf(opts.Writer, "Warning: Engine skipped - %s\n", w)
+		}
+		fmt.Fprintln(opts.Writer)
+	}
+
+	return results, nil
+}
+
+// runBenchmark runs the benchmark for a single engine.
+func runBenchmark(
+	engine Engine,
+	templates []*unstructured.Unstructured,
+	constraints []*unstructured.Unstructured,
+	reviewObjs []*unstructured.Unstructured,
+	opts *Opts,
+) (*Results, error) {
+	ctx := context.Background()
+	var setupBreakdown SetupBreakdown
+	var skippedTemplates []string
+	var skippedConstraints []string
+	loadedTemplateKinds := make(map[string]bool)
+
+	// Create the client for this engine
+	setupStart := time.Now()
+	clientStart := time.Now()
+	client, err := makeClient(engine, opts.GatherStats)
+	if err != nil {
+		return nil, fmt.Errorf("creating client: %w", err)
+	}
+	setupBreakdown.ClientCreation = time.Since(clientStart)
+
+	// Add templates (with skip support for incompatible templates)
+	templateStart := time.Now()
+	for _, obj := range templates {
+		templ, err := reader.ToTemplate(scheme, obj)
+		if err != nil {
+			return nil, fmt.Errorf("converting template %q: %w", obj.GetName(), err)
+		}
+		_, err = client.AddTemplate(ctx, templ)
+		if err != nil {
+			// Check if this is an engine compatibility issue
+			if errors.Is(err, clienterrors.ErrNoDriver) {
+				skippedTemplates = append(skippedTemplates, obj.GetName())
+				continue
+			}
+			return nil, fmt.Errorf("adding template %q: %w", templ.GetName(), err)
+		}
+		// Track the constraint kind this template creates
+		loadedTemplateKinds[templ.Spec.CRD.Spec.Names.Kind] = true
+	}
+	setupBreakdown.TemplateCompilation = time.Since(templateStart)
+
+	// Check if all templates were skipped
+	loadedTemplateCount := len(templates) - len(skippedTemplates)
+	if loadedTemplateCount == 0 {
+		return nil, fmt.Errorf("no templates compatible with %s engine (all %d templates skipped)", engine, len(templates))
+	}
+
+	// Add constraints (skip those whose template was skipped)
+	constraintStart := time.Now()
+	for _, obj := range constraints {
+		kind := obj.GetKind()
+		if !loadedTemplateKinds[kind] {
+			skippedConstraints = append(skippedConstraints, obj.GetName())
+			continue
+		}
+		if _, err := client.AddConstraint(ctx, obj); err != nil {
+			return nil, fmt.Errorf("adding constraint %q: %w", obj.GetName(), err)
+		}
+	}
+	setupBreakdown.ConstraintLoading = time.Since(constraintStart)
+
+	// Check if all constraints were skipped
+	loadedConstraintCount := len(constraints) - len(skippedConstraints)
+	if loadedConstraintCount == 0 {
+		return nil, fmt.Errorf("no constraints loaded (all %d constraints skipped due to missing templates)", len(constraints))
+	}
+
+	// Add all objects as data (for referential constraints)
+	// Note: CEL driver doesn't support referential constraints, so skip data loading for CEL
+	dataStart := time.Now()
+	var skippedDataObjects []string
+	referentialDataSupported := engine != EngineCEL
+	if referentialDataSupported {
+		for _, obj := range reviewObjs {
+			_, err := client.AddData(ctx, obj)
+			if err != nil {
+				return nil, fmt.Errorf("adding data %q: %w", obj.GetName(), err)
+			}
+		}
+	}
+	// Note: We don't populate skippedDataObjects for CEL engine because it's expected
+	// behavior (CEL doesn't support referential data), not an error. The
+	// ReferentialDataSupported field indicates this engine limitation.
+	setupBreakdown.DataLoading = time.Since(dataStart)
+
+	setupDuration := time.Since(setupStart)
+
+	// Warmup phase
+	for i := 0; i < opts.Warmup; i++ {
+		for _, obj := range reviewObjs {
+			au := target.AugmentedUnstructured{
+				Object: *obj,
+				Source: mutationtypes.SourceTypeOriginal,
+			}
+			if _, err := client.Review(ctx, au, reviews.EnforcementPoint(util.GatorEnforcementPoint)); err != nil {
+				return nil, fmt.Errorf("warmup review failed: %w", err)
+			}
+		}
+	}
+
+	// Measurement phase
+	var durations []time.Duration
+	var totalViolations int64
+
+	// Memory profiling: capture memory stats before and after
+	var memStatsBefore, memStatsAfter runtime.MemStats
+	if opts.Memory {
+		runtime.GC() // Run GC to get clean baseline
+		runtime.ReadMemStats(&memStatsBefore)
+	}
+
+	benchStart := time.Now()
+
+	// Concurrent or sequential execution based on concurrency setting
+	var statsEntries []*instrumentation.StatsEntry
+	if opts.Concurrency > 1 {
+		durations, totalViolations, statsEntries, err = runConcurrentBenchmark(ctx, client, reviewObjs, opts)
+		if err != nil {
+			return nil, err
+		}
+	} else {
+		durations, totalViolations, statsEntries, err = runSequentialBenchmark(ctx, client, reviewObjs, opts)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	totalDuration := time.Since(benchStart)
+
+	// Capture memory stats after measurement
+	var memStats *MemoryStats
+	if opts.Memory {
+		runtime.ReadMemStats(&memStatsAfter)
+		totalReviewsForMem := uint64(opts.Iterations) * uint64(len(reviewObjs)) //nolint:gosec // overflow is acceptable for benchmark counts
+		if totalReviewsForMem > 0 {
+			totalAllocs := memStatsAfter.Mallocs - memStatsBefore.Mallocs
+			totalBytes := memStatsAfter.TotalAlloc - memStatsBefore.TotalAlloc
+			memStats = &MemoryStats{
+				TotalAllocs:     totalAllocs,
+				TotalBytes:      totalBytes,
+				AllocsPerReview: totalAllocs / totalReviewsForMem,
+				BytesPerReview:  totalBytes / totalReviewsForMem,
+			}
+		}
+	}
+
+	// Calculate metrics
+	latencies := calculateLatencies(durations)
+	totalReviews := opts.Iterations * len(reviewObjs)
+	throughput := calculateThroughput(totalReviews, totalDuration)
+
+	return &Results{
+		Engine:                   engine,
+		TemplateCount:            loadedTemplateCount,
+		ConstraintCount:          loadedConstraintCount,
+		ObjectCount:              len(reviewObjs),
+		Iterations:               opts.Iterations,
+		Concurrency:              opts.Concurrency,
+		SetupDuration:            setupDuration,
+		SetupBreakdown:           setupBreakdown,
+		TotalDuration:            totalDuration,
+		Latencies:                latencies,
+		ViolationCount:           int(totalViolations),
+		ReviewsPerSecond:         throughput,
+		MemoryStats:              memStats,
+		StatsEntries:             statsEntries,
+		SkippedTemplates:         skippedTemplates,
+		SkippedConstraints:       skippedConstraints,
+		SkippedDataObjects:       skippedDataObjects,
+		ReferentialDataSupported: referentialDataSupported,
+	}, nil
+}
+
+// makeClient creates a constraint client configured for the specified engine.
+func makeClient(engine Engine, gatherStats bool) (*constraintclient.Client, error) {
+	args := []constraintclient.Opt{
+		constraintclient.Targets(&target.K8sValidationTarget{}),
+		constraintclient.EnforcementPoints(util.GatorEnforcementPoint),
+	}
+
+	switch engine {
+	case EngineRego:
+		driver, err := makeRegoDriver(gatherStats)
+		if err != nil {
+			return nil, err
+		}
+		args = append(args, constraintclient.Driver(driver))
+
+	case EngineCEL:
+		driver, err := makeCELDriver(gatherStats)
+		if err != nil {
+			return nil, err
+		}
+		args = append(args, constraintclient.Driver(driver))
+
+	default:
+		return nil, fmt.Errorf("unsupported engine: %s", engine)
+	}
+
+	return constraintclient.NewClient(args...)
+}
+
+func makeRegoDriver(gatherStats bool) (*rego.Driver, error) {
+	var args []rego.Arg
+	if gatherStats {
+		args = append(args, rego.GatherStats())
+	}
+	return rego.New(args...)
+}
+
+func makeCELDriver(gatherStats bool) (*k8scel.Driver, error) {
+	var args []k8scel.Arg
+	if gatherStats {
+		args = append(args, k8scel.GatherStats())
+	}
+	return k8scel.New(args...)
+}
+
+// runSequentialBenchmark runs the benchmark sequentially (single-threaded).
+func runSequentialBenchmark(
+	ctx context.Context,
+	client *constraintclient.Client,
+	reviewObjs []*unstructured.Unstructured,
+	opts *Opts,
+) ([]time.Duration, int64, []*instrumentation.StatsEntry, error) {
+	var durations []time.Duration
+	var totalViolations int64
+	var statsEntries []*instrumentation.StatsEntry
+
+	for i := 0; i < opts.Iterations; i++ {
+		for _, obj := range reviewObjs {
+			au := target.AugmentedUnstructured{
+				Object: *obj,
+				Source: mutationtypes.SourceTypeOriginal,
+			}
+
+			reviewStart := time.Now()
+			resp, err := client.Review(ctx, au, reviews.EnforcementPoint(util.GatorEnforcementPoint))
+			reviewDuration := time.Since(reviewStart)
+
+			if err != nil {
+				return nil, 0, nil, fmt.Errorf("review failed for %s/%s: %w",
+					obj.GetNamespace(), obj.GetName(), err)
+			}
+
+			durations = append(durations, reviewDuration)
+
+			// Count violations
+			for _, r := range resp.ByTarget {
+				totalViolations += int64(len(r.Results))
+			}
+
+			// Collect stats only from first iteration to avoid excessive data
+			if opts.GatherStats && i == 0 {
+				statsEntries = append(statsEntries, resp.StatsEntries...)
+			}
+		}
+	}
+
+	return durations, totalViolations, statsEntries, nil
+}
+
+// reviewResult holds the result of a single review for concurrent execution.
+type reviewResult struct {
+	duration     time.Duration
+	violations   int
+	statsEntries []*instrumentation.StatsEntry
+	err          error
+}
+
+// runConcurrentBenchmark runs the benchmark with multiple goroutines.
+func runConcurrentBenchmark(
+	ctx context.Context,
+	client *constraintclient.Client,
+	reviewObjs []*unstructured.Unstructured,
+	opts *Opts,
+) ([]time.Duration, int64, []*instrumentation.StatsEntry, error) {
+	totalReviews := opts.Iterations * len(reviewObjs)
+
+	// Create a cancellable context for error propagation
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+
+	// Create work items
+	type workItem struct {
+		iteration int
+		objIndex  int
+	}
+	workChan := make(chan workItem, totalReviews)
+	for i := 0; i < opts.Iterations; i++ {
+		for j := range reviewObjs {
+			workChan <- workItem{iteration: i, objIndex: j}
+		}
+	}
+	close(workChan)
+
+	// Result collection
+	resultsChan := make(chan reviewResult, totalReviews)
+	var wg sync.WaitGroup
+	var firstErr atomic.Value
+
+	// Launch worker goroutines
+	for w := 0; w < opts.Concurrency; w++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for work := range workChan {
+				// Check if we should stop due to context cancellation
+				select {
+				case <-ctx.Done():
+					return
+				default:
+				}
+
+				obj := reviewObjs[work.objIndex]
+				au := target.AugmentedUnstructured{
+					Object: *obj,
+					Source: mutationtypes.SourceTypeOriginal,
+				}
+
+				reviewStart := time.Now()
+				resp, err := client.Review(ctx, au, reviews.EnforcementPoint(util.GatorEnforcementPoint))
+				reviewDuration := time.Since(reviewStart)
+
+				if err != nil {
+					firstErr.CompareAndSwap(nil, fmt.Errorf("review failed for %s/%s: %w",
+						obj.GetNamespace(), obj.GetName(), err))
+					cancel() // Signal other goroutines to stop
+					resultsChan <- reviewResult{err: err}
+					return
+				}
+
+				violations := 0
+				for _, r := range resp.ByTarget {
+					violations += len(r.Results)
+				}
+
+				// Collect stats only from first iteration to avoid excessive data
+				var stats []*instrumentation.StatsEntry
+				if opts.GatherStats && work.iteration == 0 {
+					stats = resp.StatsEntries
+				}
+
+				resultsChan <- reviewResult{
+					duration:     reviewDuration,
+					violations:   violations,
+					statsEntries: stats,
+				}
+			}
+		}()
+	}
+
+	go func() {
+		wg.Wait()
+		close(resultsChan)
+	}()
+
+	var durations []time.Duration
+	var totalViolations int64
+	var statsEntries []*instrumentation.StatsEntry
+
+	for result := range resultsChan {
+		if result.err != nil {
+			continue
+		}
+		durations = append(durations, result.duration)
+		totalViolations += int64(result.violations)
+		if len(result.statsEntries) > 0 {
+			statsEntries = append(statsEntries, result.statsEntries...)
+		}
+	}
+
+	if errVal := firstErr.Load(); errVal != nil {
+		if err, ok := errVal.(error); ok {
+			return nil, 0, nil, err
+		}
+		return nil, 0, nil, fmt.Errorf("unexpected non-error value stored in firstErr: %T", errVal)
+	}
+
+	return durations, totalViolations, statsEntries, nil
+}
diff --git a/pkg/gator/bench/bench_test.go b/pkg/gator/bench/bench_test.go
new file mode 100644
index 00000000000..503b5988690
--- /dev/null
+++ b/pkg/gator/bench/bench_test.go
@@ -0,0 +1,859 @@
+package bench
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestRun_MissingInputs(t *testing.T) {
+	_, err := Run(&Opts{
+		Filenames:  []string{},
+		Iterations: 10,
+		Engine:     EngineRego,
+	})
+	if err == nil {
+		t.Error("expected error for missing inputs")
+	}
+}
+
+func TestRun_NoTemplates(t *testing.T) {
+	// Create a temp file with just an object (no template)
+	tmpDir := t.TempDir()
+	objFile := filepath.Join(tmpDir, "object.yaml")
+	err := os.WriteFile(objFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write test file: %v", err)
+	}
+
+	_, err = Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 1,
+		Engine:     EngineRego,
+	})
+	if err == nil {
+		t.Error("expected error for missing templates")
+	}
+}
+
+func TestRun_Integration(t *testing.T) {
+	// Create temp files with a template, constraint, and object
+	tmpDir := t.TempDir()
+
+	// Write template
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+      validation:
+        openAPIV3Schema:
+          type: object
+          properties:
+            labels:
+              type: array
+              items:
+                type: string
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          provided := {label | input.review.object.metadata.labels[label]}
+          required := {label | label := input.parameters.labels[_]}
+          missing := required - provided
+          count(missing) > 0
+          msg := sprintf("missing required labels: %v", [missing])
+        }
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+spec:
+  match:
+    kinds:
+      - apiGroups: [""]
+        kinds: ["Pod"]
+  parameters:
+    labels: ["team"]
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write object to review
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+spec:
+  containers:
+  - name: test
+    image: nginx
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	// Run benchmark with Rego engine
+	results, err := Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 5,
+		Warmup:     1,
+		Engine:     EngineRego,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+
+	r := results[0]
+	if r.Engine != EngineRego {
+		t.Errorf("expected engine %s, got %s", EngineRego, r.Engine)
+	}
+	if r.TemplateCount != 1 {
+		t.Errorf("expected 1 template, got %d", r.TemplateCount)
+	}
+	if r.ConstraintCount != 1 {
+		t.Errorf("expected 1 constraint, got %d", r.ConstraintCount)
+	}
+	if r.ObjectCount != 1 {
+		t.Errorf("expected 1 object, got %d", r.ObjectCount)
+	}
+	if r.Iterations != 5 {
+		t.Errorf("expected 5 iterations, got %d", r.Iterations)
+	}
+	// The pod is missing the required "team" label, so we expect violations
+	if r.ViolationCount == 0 {
+		t.Error("expected violations for missing label")
+	}
+	if r.ReviewsPerSecond <= 0 {
+		t.Error("expected positive throughput")
+	}
+}
+
+func TestRun_AllEngines(t *testing.T) {
+	// Create temp files with a CEL-compatible template (using VAP code block)
+	tmpDir := t.TempDir()
+
+	// Write template with both Rego and CEL validation
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+      validation:
+        openAPIV3Schema:
+          type: object
+          properties:
+            labels:
+              type: array
+              items:
+                type: string
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          provided := {label | input.review.object.metadata.labels[label]}
+          required := {label | label := input.parameters.labels[_]}
+          missing := required - provided
+          count(missing) > 0
+          msg := sprintf("missing required labels: %v", [missing])
+        }
+      code:
+        - engine: K8sNativeValidation
+          source:
+            validations:
+              - expression: "has(object.metadata.labels) && object.metadata.labels.all(label, label in variables.params.labels)"
+                message: "missing required labels"
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+spec:
+  parameters:
+    labels: ["team"]
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write object
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	// Run with EngineAll
+	results, err := Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 2,
+		Warmup:     0,
+		Engine:     EngineAll,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	// Should have results for both engines
+	if len(results) != 2 {
+		t.Fatalf("expected 2 results for EngineAll, got %d", len(results))
+	}
+
+	// First result should be Rego
+	if results[0].Engine != EngineRego {
+		t.Errorf("expected first result to be rego, got %s", results[0].Engine)
+	}
+	// Second result should be CEL
+	if results[1].Engine != EngineCEL {
+		t.Errorf("expected second result to be cel, got %s", results[1].Engine)
+	}
+}
+
+func TestRun_NoConstraints(t *testing.T) {
+	// Create a temp file with template but no constraint
+	tmpDir := t.TempDir()
+
+	// Write template
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          msg := "test"
+        }
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write object (no constraint)
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	_, err = Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 1,
+		Engine:     EngineRego,
+	})
+	if err == nil {
+		t.Error("expected error for missing constraints")
+	}
+}
+
+func TestRun_NoObjects(t *testing.T) {
+	// Create a temp file with template and constraint but no objects
+	tmpDir := t.TempDir()
+
+	// Write template
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          msg := "test"
+        }
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint only
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	_, err = Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 1,
+		Engine:     EngineRego,
+	})
+	if err == nil {
+		t.Error("expected error for missing objects to review")
+	}
+}
+
+func TestRun_WithGatherStats(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Write template
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          msg := "test"
+        }
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write object
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	// Run with GatherStats enabled
+	results, err := Run(&Opts{
+		Filenames:   []string{tmpDir},
+		Iterations:  2,
+		Warmup:      0,
+		Engine:      EngineRego,
+		GatherStats: true,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+}
+
+func TestRun_CELOnly(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Write template with CEL code block
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      code:
+        - engine: K8sNativeValidation
+          source:
+            validations:
+              - expression: "true"
+                message: "always pass"
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write object
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	// Run with CEL engine only
+	results, err := Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 2,
+		Warmup:     0,
+		Engine:     EngineCEL,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+	if results[0].Engine != EngineCEL {
+		t.Errorf("expected engine cel, got %s", results[0].Engine)
+	}
+}
+
+func TestRun_SetupBreakdown(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Write template
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          msg := "test"
+        }
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write object
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	results, err := Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 2,
+		Warmup:     0,
+		Engine:     EngineRego,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+
+	r := results[0]
+	// Check that setup breakdown fields are populated
+	if r.SetupBreakdown.ClientCreation == 0 {
+		t.Error("expected ClientCreation to be non-zero")
+	}
+	if r.SetupBreakdown.TemplateCompilation == 0 {
+		t.Error("expected TemplateCompilation to be non-zero")
+	}
+	if r.SetupBreakdown.ConstraintLoading == 0 {
+		t.Error("expected ConstraintLoading to be non-zero")
+	}
+	// DataLoading can be zero if there are no objects to load as data
+}
+
+func TestRun_SkippedTemplates(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Write Rego-only template (incompatible with CEL)
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          msg := "test"
+        }
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write object
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	// Run with EngineAll - CEL should fail but Rego should succeed
+	var buf bytes.Buffer
+	results, err := Run(&Opts{
+		Filenames:  []string{tmpDir},
+		Iterations: 2,
+		Warmup:     0,
+		Engine:     EngineAll,
+		Writer:     &buf,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	// Should have 1 result (only Rego succeeded)
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+
+	if results[0].Engine != EngineRego {
+		t.Errorf("expected engine rego, got %s", results[0].Engine)
+	}
+
+	// Check that warning was written
+	output := buf.String()
+	if output == "" {
+		t.Error("expected warning about skipped CEL engine")
+	}
+}
+
+func TestRun_Concurrent(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Write template
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+      validation:
+        openAPIV3Schema:
+          type: object
+          properties:
+            labels:
+              type: array
+              items:
+                type: string
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          provided := {label | input.review.object.metadata.labels[label]}
+          required := {label | label := input.parameters.labels[_]}
+          missing := required - provided
+          count(missing) > 0
+          msg := sprintf("missing required labels: %v", [missing])
+        }
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+spec:
+  match:
+    kinds:
+      - apiGroups: [""]
+        kinds: ["Pod"]
+  parameters:
+    labels: ["team"]
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write multiple objects to review for concurrent testing
+	for i := 0; i < 3; i++ {
+		objectFile := filepath.Join(tmpDir, fmt.Sprintf("pod%d.yaml", i))
+		err = os.WriteFile(objectFile, []byte(fmt.Sprintf(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod-%d
+spec:
+  containers:
+  - name: test
+    image: nginx
+`, i)), 0o600)
+		if err != nil {
+			t.Fatalf("failed to write object file: %v", err)
+		}
+	}
+
+	// Run benchmark with concurrency > 1
+	results, err := Run(&Opts{
+		Filenames:   []string{tmpDir},
+		Iterations:  10,
+		Warmup:      1,
+		Engine:      EngineRego,
+		Concurrency: 4,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+
+	r := results[0]
+	if r.Engine != EngineRego {
+		t.Errorf("expected engine %s, got %s", EngineRego, r.Engine)
+	}
+	if r.Concurrency != 4 {
+		t.Errorf("expected concurrency 4, got %d", r.Concurrency)
+	}
+	if r.TemplateCount != 1 {
+		t.Errorf("expected 1 template, got %d", r.TemplateCount)
+	}
+	if r.ConstraintCount != 1 {
+		t.Errorf("expected 1 constraint, got %d", r.ConstraintCount)
+	}
+	if r.ObjectCount != 3 {
+		t.Errorf("expected 3 objects, got %d", r.ObjectCount)
+	}
+	if r.Iterations != 10 {
+		t.Errorf("expected 10 iterations, got %d", r.Iterations)
+	}
+	// All pods are missing the required "team" label, so we expect violations
+	if r.ViolationCount == 0 {
+		t.Error("expected violations for missing labels")
+	}
+	if r.ReviewsPerSecond <= 0 {
+		t.Error("expected positive throughput")
+	}
+}
+
+func TestRun_CELWithGatherStats(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// Write template with CEL code block
+	templateFile := filepath.Join(tmpDir, "template.yaml")
+	err := os.WriteFile(templateFile, []byte(`
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      code:
+        - engine: K8sNativeValidation
+          source:
+            validations:
+              - expression: "true"
+                message: "always pass"
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write template file: %v", err)
+	}
+
+	// Write constraint
+	constraintFile := filepath.Join(tmpDir, "constraint.yaml")
+	err = os.WriteFile(constraintFile, []byte(`
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write constraint file: %v", err)
+	}
+
+	// Write object
+	objectFile := filepath.Join(tmpDir, "pod.yaml")
+	err = os.WriteFile(objectFile, []byte(`
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-pod
+`), 0o600)
+	if err != nil {
+		t.Fatalf("failed to write object file: %v", err)
+	}
+
+	// Run with CEL engine and GatherStats enabled
+	results, err := Run(&Opts{
+		Filenames:   []string{tmpDir},
+		Iterations:  2,
+		Warmup:      0,
+		Engine:      EngineCEL,
+		GatherStats: true,
+	})
+	if err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	if len(results) != 1 {
+		t.Fatalf("expected 1 result, got %d", len(results))
+	}
+	if results[0].Engine != EngineCEL {
+		t.Errorf("expected engine cel, got %s", results[0].Engine)
+	}
+}
+
+func TestMakeClient_UnsupportedEngine(t *testing.T) {
+	_, err := makeClient(Engine("invalid"), false)
+	if err == nil {
+		t.Error("expected error for unsupported engine")
+	}
+	if !strings.Contains(err.Error(), "unsupported engine") {
+		t.Errorf("expected 'unsupported engine' error, got: %v", err)
+	}
+}
diff --git a/pkg/gator/bench/compare.go b/pkg/gator/bench/compare.go
new file mode 100644
index 00000000000..5dde1ac0d73
--- /dev/null
+++ b/pkg/gator/bench/compare.go
@@ -0,0 +1,218 @@
+package bench
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/gator"
+	"sigs.k8s.io/yaml"
+)
+
+// SaveResults saves benchmark results to a file in JSON or YAML format.
+// The format is determined by the file extension (.json or .yaml/.yml).
+func SaveResults(results []Results, path string) error {
+	ext := filepath.Ext(path)
+
+	var data []byte
+	var err error
+
+	switch ext {
+	case gator.ExtYAML, gator.ExtYML:
+		data, err = yaml.Marshal(results)
+	default:
+		// Default to JSON
+		data, err = json.MarshalIndent(results, "", "  ")
+	}
+	if err != nil {
+		return fmt.Errorf("marshaling results: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0o600); err != nil {
+		return fmt.Errorf("writing results to %s: %w", path, err)
+	}
+
+	return nil
+}
+
+// LoadBaseline loads baseline results from a file.
+// The format is determined by the file extension (.json or .yaml/.yml).
+func LoadBaseline(path string) ([]Results, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("reading baseline from %s: %w", path, err)
+	}
+
+	ext := filepath.Ext(path)
+	var results []Results
+
+	switch ext {
+	case gator.ExtYAML, gator.ExtYML:
+		err = yaml.Unmarshal(data, &results)
+	default:
+		// Default to JSON
+		err = json.Unmarshal(data, &results)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("unmarshaling baseline: %w", err)
+	}
+
+	return results, nil
+}
+
+// Compare compares current results against baseline results and returns comparison data.
+// The threshold is the percentage change considered a regression (e.g., 10 means 10%).
+// The minThreshold is the minimum absolute difference to consider a regression.
+// For latency metrics, positive change = regression. For throughput, negative change = regression.
+func Compare(baseline, current []Results, threshold float64, minThreshold time.Duration) []ComparisonResult {
+	var comparisons []ComparisonResult
+
+	// Create a map of baseline results by engine for easy lookup
+	baselineByEngine := make(map[Engine]*Results)
+	for i := range baseline {
+		baselineByEngine[baseline[i].Engine] = &baseline[i]
+	}
+
+	// Compare each current result against its baseline
+	for i := range current {
+		curr := &current[i]
+		base, ok := baselineByEngine[curr.Engine]
+		if !ok {
+			// No baseline for this engine, skip comparison
+			continue
+		}
+
+		comparison := compareResults(base, curr, threshold, minThreshold)
+		comparisons = append(comparisons, comparison)
+	}
+
+	return comparisons
+}
+
+func compareResults(baseline, current *Results, threshold float64, minThreshold time.Duration) ComparisonResult {
+	var metrics []MetricComparison
+	var failedMetrics []string
+	allPassed := true
+
+	// Compare latency metrics (higher is worse, so positive delta = regression)
+	latencyMetrics := []struct {
+		name     string
+		baseline float64
+		current  float64
+	}{
+		{"P50 Latency", float64(baseline.Latencies.P50), float64(current.Latencies.P50)},
+		{"P95 Latency", float64(baseline.Latencies.P95), float64(current.Latencies.P95)},
+		{"P99 Latency", float64(baseline.Latencies.P99), float64(current.Latencies.P99)},
+		{"Mean Latency", float64(baseline.Latencies.Mean), float64(current.Latencies.Mean)},
+	}
+
+	for _, m := range latencyMetrics {
+		delta := calculateDelta(m.baseline, m.current)
+		// For latency, check both percentage threshold AND minimum absolute threshold
+		// If minThreshold is set, ignore regressions smaller than the absolute minimum
+		absDiff := time.Duration(m.current) - time.Duration(m.baseline)
+		passed := delta <= threshold || (minThreshold > 0 && absDiff < minThreshold)
+		if !passed {
+			allPassed = false
+			failedMetrics = append(failedMetrics, m.name)
+		}
+		metrics = append(metrics, MetricComparison{
+			Name:     m.name,
+			Baseline: m.baseline,
+			Current:  m.current,
+			Delta:    delta,
+			Passed:   passed,
+		})
+	}
+
+	// Compare throughput (lower is worse, so negative delta = regression)
+	throughputDelta := calculateDelta(baseline.ReviewsPerSecond, current.ReviewsPerSecond)
+	// For throughput, we invert the logic: negative delta is a regression
+	// If minThreshold is set, convert it to a throughput difference threshold
+	// A latency increase of minThreshold corresponds to a throughput change that we should ignore
+	throughputPassed := -throughputDelta <= threshold
+	if !throughputPassed && minThreshold > 0 && baseline.Latencies.Mean > 0 {
+		// Calculate the absolute throughput difference
+		absThroughputDiff := baseline.ReviewsPerSecond - current.ReviewsPerSecond
+		// Convert minThreshold to an equivalent throughput tolerance
+		// If we tolerate minThreshold latency change, we should tolerate proportional throughput change
+		// Use baseline throughput to derive a reasonable tolerance from the latency threshold
+		// throughput ≈ 1/latency, so tolerance should be proportional to baseline throughput
+		minThroughputDiff := baseline.ReviewsPerSecond * (float64(minThreshold) / float64(baseline.Latencies.Mean))
+		if absThroughputDiff < minThroughputDiff {
+			throughputPassed = true
+		}
+	}
+	if !throughputPassed {
+		allPassed = false
+		failedMetrics = append(failedMetrics, "Throughput")
+	}
+	metrics = append(metrics, MetricComparison{
+		Name:     "Throughput",
+		Baseline: baseline.ReviewsPerSecond,
+		Current:  current.ReviewsPerSecond,
+		Delta:    throughputDelta,
+		Passed:   throughputPassed,
+	})
+
+	// Compare memory stats if available
+	// Note: minThreshold is a time.Duration and applies only to latency/throughput metrics.
+	// Memory metrics are evaluated strictly against the percentage threshold.
+	if baseline.MemoryStats != nil && current.MemoryStats != nil {
+		allocsDelta := calculateDelta(
+			float64(baseline.MemoryStats.AllocsPerReview),
+			float64(current.MemoryStats.AllocsPerReview),
+		)
+		allocsPassed := allocsDelta <= threshold
+		if !allocsPassed {
+			allPassed = false
+			failedMetrics = append(failedMetrics, "Allocs/Review")
+		}
+		metrics = append(metrics, MetricComparison{
+			Name:     "Allocs/Review",
+			Baseline: float64(baseline.MemoryStats.AllocsPerReview),
+			Current:  float64(current.MemoryStats.AllocsPerReview),
+			Delta:    allocsDelta,
+			Passed:   allocsPassed,
+		})
+
+		bytesDelta := calculateDelta(
+			float64(baseline.MemoryStats.BytesPerReview),
+			float64(current.MemoryStats.BytesPerReview),
+		)
+		bytesPassed := bytesDelta <= threshold
+		if !bytesPassed {
+			allPassed = false
+			failedMetrics = append(failedMetrics, "Bytes/Review")
+		}
+		metrics = append(metrics, MetricComparison{
+			Name:     "Bytes/Review",
+			Baseline: float64(baseline.MemoryStats.BytesPerReview),
+			Current:  float64(current.MemoryStats.BytesPerReview),
+			Delta:    bytesDelta,
+			Passed:   bytesPassed,
+		})
+	}
+
+	return ComparisonResult{
+		BaselineEngine: baseline.Engine,
+		CurrentEngine:  current.Engine,
+		Metrics:        metrics,
+		Passed:         allPassed,
+		FailedMetrics:  failedMetrics,
+	}
+}
+
+// calculateDelta calculates the percentage change from baseline to current.
+// Returns positive value if current > baseline (regression for latency metrics).
+func calculateDelta(baseline, current float64) float64 {
+	if baseline == 0 {
+		if current == 0 {
+			return 0
+		}
+		return 100 // Infinite increase represented as 100%
+	}
+	return ((current - baseline) / baseline) * 100
+}
diff --git a/pkg/gator/bench/compare_test.go b/pkg/gator/bench/compare_test.go
new file mode 100644
index 00000000000..cf8a71b7725
--- /dev/null
+++ b/pkg/gator/bench/compare_test.go
@@ -0,0 +1,416 @@
+package bench
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestSaveAndLoadResults(t *testing.T) {
+	results := []Results{
+		{
+			Engine:          EngineRego,
+			TemplateCount:   5,
+			ConstraintCount: 10,
+			ObjectCount:     100,
+			Iterations:      50,
+			SetupDuration:   time.Second,
+			TotalDuration:   5 * time.Second,
+			Latencies: Latencies{
+				Min:  100 * time.Microsecond,
+				Max:  10 * time.Millisecond,
+				Mean: 1 * time.Millisecond,
+				P50:  900 * time.Microsecond,
+				P95:  5 * time.Millisecond,
+				P99:  8 * time.Millisecond,
+			},
+			ViolationCount:   25,
+			ReviewsPerSecond: 1000,
+			MemoryStats: &MemoryStats{
+				AllocsPerReview: 500,
+				BytesPerReview:  10240,
+				TotalAllocs:     25000,
+				TotalBytes:      512000,
+			},
+		},
+	}
+
+	t.Run("JSON format", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		path := filepath.Join(tmpDir, "baseline.json")
+
+		// Save
+		err := SaveResults(results, path)
+		if err != nil {
+			t.Fatalf("SaveResults failed: %v", err)
+		}
+
+		// Verify file exists
+		if _, err := os.Stat(path); os.IsNotExist(err) {
+			t.Fatalf("file was not created")
+		}
+
+		// Load
+		loaded, err := LoadBaseline(path)
+		if err != nil {
+			t.Fatalf("LoadBaseline failed: %v", err)
+		}
+
+		if len(loaded) != 1 {
+			t.Fatalf("expected 1 result, got %d", len(loaded))
+		}
+
+		if loaded[0].Engine != EngineRego {
+			t.Errorf("Engine = %v, want %v", loaded[0].Engine, EngineRego)
+		}
+		if loaded[0].ReviewsPerSecond != 1000 {
+			t.Errorf("ReviewsPerSecond = %v, want %v", loaded[0].ReviewsPerSecond, 1000)
+		}
+	})
+
+	t.Run("YAML format", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		path := filepath.Join(tmpDir, "baseline.yaml")
+
+		// Save
+		err := SaveResults(results, path)
+		if err != nil {
+			t.Fatalf("SaveResults failed: %v", err)
+		}
+
+		// Load
+		loaded, err := LoadBaseline(path)
+		if err != nil {
+			t.Fatalf("LoadBaseline failed: %v", err)
+		}
+
+		if len(loaded) != 1 {
+			t.Fatalf("expected 1 result, got %d", len(loaded))
+		}
+
+		if loaded[0].Engine != EngineRego {
+			t.Errorf("Engine = %v, want %v", loaded[0].Engine, EngineRego)
+		}
+	})
+
+	t.Run("YML extension", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		path := filepath.Join(tmpDir, "baseline.yml")
+
+		// Save
+		err := SaveResults(results, path)
+		if err != nil {
+			t.Fatalf("SaveResults failed: %v", err)
+		}
+
+		// Load
+		loaded, err := LoadBaseline(path)
+		if err != nil {
+			t.Fatalf("LoadBaseline failed: %v", err)
+		}
+
+		if len(loaded) != 1 {
+			t.Fatalf("expected 1 result, got %d", len(loaded))
+		}
+	})
+}
+
+func TestLoadBaseline_FileNotFound(t *testing.T) {
+	_, err := LoadBaseline("/nonexistent/path/baseline.json")
+	if err == nil {
+		t.Fatal("expected error for non-existent file")
+	}
+}
+
+func TestCompare(t *testing.T) {
+	baseline := []Results{
+		{
+			Engine: EngineRego,
+			Latencies: Latencies{
+				P50:  1 * time.Millisecond,
+				P95:  5 * time.Millisecond,
+				P99:  10 * time.Millisecond,
+				Mean: 2 * time.Millisecond,
+			},
+			ReviewsPerSecond: 1000,
+			MemoryStats: &MemoryStats{
+				AllocsPerReview: 500,
+				BytesPerReview:  10240,
+			},
+		},
+	}
+
+	t.Run("no regression", func(t *testing.T) {
+		current := []Results{
+			{
+				Engine: EngineRego,
+				Latencies: Latencies{
+					P50:  1050 * time.Microsecond, // 5% increase
+					P95:  5 * time.Millisecond,
+					P99:  10 * time.Millisecond,
+					Mean: 2 * time.Millisecond,
+				},
+				ReviewsPerSecond: 950, // 5% decrease
+				MemoryStats: &MemoryStats{
+					AllocsPerReview: 520, // 4% increase
+					BytesPerReview:  10500,
+				},
+			},
+		}
+
+		comparisons := Compare(baseline, current, 10.0, 0)
+		if len(comparisons) != 1 {
+			t.Fatalf("expected 1 comparison, got %d", len(comparisons))
+		}
+
+		if !comparisons[0].Passed {
+			t.Errorf("expected comparison to pass, got failed metrics: %v", comparisons[0].FailedMetrics)
+		}
+	})
+
+	t.Run("latency regression", func(t *testing.T) {
+		current := []Results{
+			{
+				Engine: EngineRego,
+				Latencies: Latencies{
+					P50:  1500 * time.Microsecond, // 50% increase - regression!
+					P95:  5 * time.Millisecond,
+					P99:  10 * time.Millisecond,
+					Mean: 2 * time.Millisecond,
+				},
+				ReviewsPerSecond: 1000,
+			},
+		}
+
+		comparisons := Compare(baseline, current, 10.0, 0)
+		if len(comparisons) != 1 {
+			t.Fatalf("expected 1 comparison, got %d", len(comparisons))
+		}
+
+		if comparisons[0].Passed {
+			t.Error("expected comparison to fail due to latency regression")
+		}
+		if len(comparisons[0].FailedMetrics) == 0 {
+			t.Error("expected failed metrics to be populated")
+		}
+	})
+
+	t.Run("throughput regression", func(t *testing.T) {
+		current := []Results{
+			{
+				Engine: EngineRego,
+				Latencies: Latencies{
+					P50:  1 * time.Millisecond,
+					P95:  5 * time.Millisecond,
+					P99:  10 * time.Millisecond,
+					Mean: 2 * time.Millisecond,
+				},
+				ReviewsPerSecond: 800, // 20% decrease - regression!
+			},
+		}
+
+		comparisons := Compare(baseline, current, 10.0, 0)
+		if len(comparisons) != 1 {
+			t.Fatalf("expected 1 comparison, got %d", len(comparisons))
+		}
+
+		if comparisons[0].Passed {
+			t.Error("expected comparison to fail due to throughput regression")
+		}
+
+		foundThroughput := false
+		for _, m := range comparisons[0].FailedMetrics {
+			if m == "Throughput" {
+				foundThroughput = true
+				break
+			}
+		}
+		if !foundThroughput {
+			t.Error("expected Throughput to be in failed metrics")
+		}
+	})
+
+	t.Run("no matching engine", func(t *testing.T) {
+		current := []Results{
+			{
+				Engine: EngineCEL, // Different engine
+				Latencies: Latencies{
+					P50: 1 * time.Millisecond,
+				},
+				ReviewsPerSecond: 1000,
+			},
+		}
+
+		comparisons := Compare(baseline, current, 10.0, 0)
+		if len(comparisons) != 0 {
+			t.Errorf("expected 0 comparisons for non-matching engine, got %d", len(comparisons))
+		}
+	})
+
+	t.Run("min threshold bypasses percentage regression", func(t *testing.T) {
+		// Use a fast baseline where percentage changes are noise
+		fastBaseline := []Results{
+			{
+				Engine: EngineRego,
+				Latencies: Latencies{
+					P50:  100 * time.Microsecond,
+					P95:  200 * time.Microsecond,
+					P99:  300 * time.Microsecond,
+					Mean: 150 * time.Microsecond,
+				},
+				ReviewsPerSecond: 10000,
+			},
+		}
+
+		current := []Results{
+			{
+				Engine: EngineRego,
+				Latencies: Latencies{
+					P50:  120 * time.Microsecond, // 20% increase but only 20µs
+					P95:  240 * time.Microsecond, // 20% increase but only 40µs
+					P99:  360 * time.Microsecond, // 20% increase but only 60µs
+					Mean: 180 * time.Microsecond, // 20% increase but only 30µs
+				},
+				ReviewsPerSecond: 8000, // 20% decrease
+			},
+		}
+
+		// Without min threshold, this would fail (20% > 10%)
+		comparisonsWithoutMin := Compare(fastBaseline, current, 10.0, 0)
+		if len(comparisonsWithoutMin) != 1 {
+			t.Fatalf("expected 1 comparison, got %d", len(comparisonsWithoutMin))
+		}
+		if comparisonsWithoutMin[0].Passed {
+			t.Error("expected comparison without min-threshold to fail")
+		}
+
+		// With min threshold of 1s, all changes should pass as the absolute differences
+		// are well below the min-threshold tolerance
+		comparisonsWithMin := Compare(fastBaseline, current, 10.0, 1*time.Second)
+		if len(comparisonsWithMin) != 1 {
+			t.Fatalf("expected 1 comparison, got %d", len(comparisonsWithMin))
+		}
+
+		// With a large min threshold, the comparison should pass since all differences
+		// are below the min-threshold tolerance (including throughput)
+		if !comparisonsWithMin[0].Passed {
+			t.Errorf("expected comparison with large min-threshold (1s) to pass, got failed metrics: %v", comparisonsWithMin[0].FailedMetrics)
+		}
+	})
+}
+
+func TestCalculateDelta(t *testing.T) {
+	tests := []struct {
+		name     string
+		baseline float64
+		current  float64
+		want     float64
+	}{
+		{
+			name:     "no change",
+			baseline: 100,
+			current:  100,
+			want:     0,
+		},
+		{
+			name:     "10% increase",
+			baseline: 100,
+			current:  110,
+			want:     10,
+		},
+		{
+			name:     "10% decrease",
+			baseline: 100,
+			current:  90,
+			want:     -10,
+		},
+		{
+			name:     "zero baseline with current",
+			baseline: 0,
+			current:  100,
+			want:     100,
+		},
+		{
+			name:     "both zero",
+			baseline: 0,
+			current:  0,
+			want:     0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := calculateDelta(tt.baseline, tt.current)
+			if got != tt.want {
+				t.Errorf("calculateDelta(%v, %v) = %v, want %v",
+					tt.baseline, tt.current, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestFormatComparison(t *testing.T) {
+	comparisons := []ComparisonResult{
+		{
+			BaselineEngine: EngineRego,
+			CurrentEngine:  EngineRego,
+			Metrics: []MetricComparison{
+				{Name: "P50 Latency", Baseline: 1000000, Current: 1100000, Delta: 10, Passed: true},
+				{Name: "Throughput", Baseline: 1000, Current: 900, Delta: -10, Passed: true},
+			},
+			Passed:        true,
+			FailedMetrics: nil,
+		},
+	}
+
+	output := FormatComparison(comparisons, 10.0)
+
+	// Check that output contains expected strings
+	if output == "" {
+		t.Error("expected non-empty output")
+	}
+
+	expectedStrings := []string{
+		"Baseline Comparison",
+		"REGO",
+		"P50 Latency",
+		"Throughput",
+		"No significant regressions",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("expected output to contain %q", s)
+		}
+	}
+}
+
+func TestFormatComparison_WithRegression(t *testing.T) {
+	comparisons := []ComparisonResult{
+		{
+			BaselineEngine: EngineRego,
+			CurrentEngine:  EngineRego,
+			Metrics: []MetricComparison{
+				{Name: "P50 Latency", Baseline: 1000000, Current: 1500000, Delta: 50, Passed: false},
+			},
+			Passed:        false,
+			FailedMetrics: []string{"P50 Latency"},
+		},
+	}
+
+	output := FormatComparison(comparisons, 10.0)
+
+	expectedStrings := []string{
+		"REGRESSION",
+		"Regressions detected",
+		"P50 Latency",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("expected output to contain %q", s)
+		}
+	}
+}
diff --git a/pkg/gator/bench/metrics.go b/pkg/gator/bench/metrics.go
new file mode 100644
index 00000000000..38ae7c6c7a1
--- /dev/null
+++ b/pkg/gator/bench/metrics.go
@@ -0,0 +1,66 @@
+package bench
+
+import (
+	"sort"
+	"time"
+)
+
+// calculateLatencies computes latency statistics from a slice of durations.
+func calculateLatencies(durations []time.Duration) Latencies {
+	if len(durations) == 0 {
+		return Latencies{}
+	}
+
+	// Sort for percentile calculation
+	sorted := make([]time.Duration, len(durations))
+	copy(sorted, durations)
+	sort.Slice(sorted, func(i, j int) bool {
+		return sorted[i] < sorted[j]
+	})
+
+	var total time.Duration
+	for _, d := range sorted {
+		total += d
+	}
+
+	return Latencies{
+		Min:  sorted[0],
+		Max:  sorted[len(sorted)-1],
+		Mean: time.Duration(int64(total) / int64(len(sorted))),
+		P50:  percentile(sorted, 50),
+		P95:  percentile(sorted, 95),
+		P99:  percentile(sorted, 99),
+	}
+}
+
+// percentile calculates the p-th percentile from a sorted slice of durations.
+// The input slice must be sorted in ascending order.
+func percentile(sorted []time.Duration, p float64) time.Duration {
+	if len(sorted) == 0 {
+		return 0
+	}
+	if len(sorted) == 1 {
+		return sorted[0]
+	}
+
+	// Calculate the index using the nearest-rank method
+	rank := (p / 100.0) * float64(len(sorted)-1)
+	lower := int(rank)
+	upper := lower + 1
+
+	if upper >= len(sorted) {
+		return sorted[len(sorted)-1]
+	}
+
+	// Linear interpolation between the two nearest ranks
+	weight := rank - float64(lower)
+	return time.Duration(float64(sorted[lower])*(1-weight) + float64(sorted[upper])*weight)
+}
+
+// calculateThroughput computes reviews per second.
+func calculateThroughput(reviewCount int, duration time.Duration) float64 {
+	if duration == 0 {
+		return 0
+	}
+	return float64(reviewCount) / duration.Seconds()
+}
diff --git a/pkg/gator/bench/metrics_test.go b/pkg/gator/bench/metrics_test.go
new file mode 100644
index 00000000000..4b718e14b0d
--- /dev/null
+++ b/pkg/gator/bench/metrics_test.go
@@ -0,0 +1,187 @@
+package bench
+
+import (
+	"testing"
+	"time"
+)
+
+func TestCalculateLatencies(t *testing.T) {
+	tests := []struct {
+		name      string
+		durations []time.Duration
+		wantMin   time.Duration
+		wantMax   time.Duration
+		wantMean  time.Duration
+	}{
+		{
+			name:      "empty slice",
+			durations: []time.Duration{},
+			wantMin:   0,
+			wantMax:   0,
+			wantMean:  0,
+		},
+		{
+			name:      "single duration",
+			durations: []time.Duration{100 * time.Millisecond},
+			wantMin:   100 * time.Millisecond,
+			wantMax:   100 * time.Millisecond,
+			wantMean:  100 * time.Millisecond,
+		},
+		{
+			name: "multiple durations",
+			durations: []time.Duration{
+				10 * time.Millisecond,
+				20 * time.Millisecond,
+				30 * time.Millisecond,
+				40 * time.Millisecond,
+				50 * time.Millisecond,
+			},
+			wantMin:  10 * time.Millisecond,
+			wantMax:  50 * time.Millisecond,
+			wantMean: 30 * time.Millisecond,
+		},
+		{
+			name: "unsorted durations",
+			durations: []time.Duration{
+				50 * time.Millisecond,
+				10 * time.Millisecond,
+				30 * time.Millisecond,
+				20 * time.Millisecond,
+				40 * time.Millisecond,
+			},
+			wantMin:  10 * time.Millisecond,
+			wantMax:  50 * time.Millisecond,
+			wantMean: 30 * time.Millisecond,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := calculateLatencies(tt.durations)
+
+			if got.Min != tt.wantMin {
+				t.Errorf("Min = %v, want %v", got.Min, tt.wantMin)
+			}
+			if got.Max != tt.wantMax {
+				t.Errorf("Max = %v, want %v", got.Max, tt.wantMax)
+			}
+			if got.Mean != tt.wantMean {
+				t.Errorf("Mean = %v, want %v", got.Mean, tt.wantMean)
+			}
+		})
+	}
+}
+
+func TestPercentile(t *testing.T) {
+	tests := []struct {
+		name   string
+		sorted []time.Duration
+		p      float64
+		want   time.Duration
+	}{
+		{
+			name:   "empty slice",
+			sorted: []time.Duration{},
+			p:      50,
+			want:   0,
+		},
+		{
+			name:   "single element p50",
+			sorted: []time.Duration{100 * time.Millisecond},
+			p:      50,
+			want:   100 * time.Millisecond,
+		},
+		{
+			name: "p50 odd count",
+			sorted: []time.Duration{
+				10 * time.Millisecond,
+				20 * time.Millisecond,
+				30 * time.Millisecond,
+				40 * time.Millisecond,
+				50 * time.Millisecond,
+			},
+			p:    50,
+			want: 30 * time.Millisecond,
+		},
+		{
+			name: "p99 many elements",
+			sorted: []time.Duration{
+				10 * time.Millisecond,
+				20 * time.Millisecond,
+				30 * time.Millisecond,
+				40 * time.Millisecond,
+				50 * time.Millisecond,
+			},
+			p:    99,
+			want: 49600 * time.Microsecond, // interpolated
+		},
+		{
+			name: "p100 returns last element",
+			sorted: []time.Duration{
+				10 * time.Millisecond,
+				20 * time.Millisecond,
+				30 * time.Millisecond,
+			},
+			p:    100,
+			want: 30 * time.Millisecond, // upper >= len case
+		},
+		{
+			name:   "two elements p0",
+			sorted: []time.Duration{10 * time.Millisecond, 20 * time.Millisecond},
+			p:      0,
+			want:   10 * time.Millisecond,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := percentile(tt.sorted, tt.p)
+			// Allow 1ms tolerance for interpolation
+			diff := got - tt.want
+			if diff < 0 {
+				diff = -diff
+			}
+			if diff > time.Millisecond {
+				t.Errorf("percentile(%v, %v) = %v, want %v", tt.sorted, tt.p, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestCalculateThroughput(t *testing.T) {
+	tests := []struct {
+		name        string
+		reviewCount int
+		duration    time.Duration
+		want        float64
+	}{
+		{
+			name:        "zero duration",
+			reviewCount: 100,
+			duration:    0,
+			want:        0,
+		},
+		{
+			name:        "1 second duration",
+			reviewCount: 100,
+			duration:    time.Second,
+			want:        100,
+		},
+		{
+			name:        "500ms duration",
+			reviewCount: 50,
+			duration:    500 * time.Millisecond,
+			want:        100,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := calculateThroughput(tt.reviewCount, tt.duration)
+			if got != tt.want {
+				t.Errorf("calculateThroughput(%v, %v) = %v, want %v",
+					tt.reviewCount, tt.duration, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/pkg/gator/bench/output.go b/pkg/gator/bench/output.go
new file mode 100644
index 00000000000..f77cecf31a3
--- /dev/null
+++ b/pkg/gator/bench/output.go
@@ -0,0 +1,595 @@
+package bench
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"strings"
+	"text/tabwriter"
+	"time"
+
+	"gopkg.in/yaml.v3"
+)
+
+// OutputFormat represents the output format for benchmark results.
+type OutputFormat string
+
+const (
+	// OutputFormatTable outputs results as a human-readable table.
+	OutputFormatTable OutputFormat = "table"
+	// OutputFormatJSON outputs results as JSON.
+	OutputFormatJSON OutputFormat = "json"
+	// OutputFormatYAML outputs results as YAML.
+	OutputFormatYAML OutputFormat = "yaml"
+)
+
+// ParseOutputFormat parses a string into an OutputFormat.
+func ParseOutputFormat(s string) (OutputFormat, error) {
+	switch strings.ToLower(s) {
+	case "", "table":
+		return OutputFormatTable, nil
+	case "json":
+		return OutputFormatJSON, nil
+	case "yaml":
+		return OutputFormatYAML, nil
+	default:
+		return "", fmt.Errorf("invalid output format: %q (valid: table, json, yaml)", s)
+	}
+}
+
+// FormatResults formats benchmark results according to the specified format.
+func FormatResults(results []Results, format OutputFormat) (string, error) {
+	switch format {
+	case OutputFormatJSON:
+		return formatJSON(results)
+	case OutputFormatYAML:
+		return formatYAML(results)
+	case OutputFormatTable:
+		fallthrough
+	default:
+		return formatTable(results), nil
+	}
+}
+
+// FormatComparison formats comparison results for display.
+func FormatComparison(comparisons []ComparisonResult, threshold float64) string {
+	var buf bytes.Buffer
+
+	for i, comp := range comparisons {
+		if i > 0 {
+			buf.WriteString("\n")
+		}
+		writeComparisonResult(&buf, &comp, threshold)
+	}
+
+	return buf.String()
+}
+
+func writeComparisonResult(w io.Writer, comp *ComparisonResult, threshold float64) {
+	fmt.Fprintf(w, "=== Baseline Comparison: %s Engine ===\n\n",
+		strings.ToUpper(string(comp.CurrentEngine)))
+
+	tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0)
+
+	// Header
+	fmt.Fprintln(tw, "Metric\tBaseline\tCurrent\tDelta\tStatus")
+	fmt.Fprintln(tw, "------\t--------\t-------\t-----\t------")
+
+	for _, m := range comp.Metrics {
+		status := "✓"
+		if !m.Passed {
+			status = "✗ REGRESSION"
+		}
+
+		// Format values based on metric type
+		var baselineStr, currentStr string
+		switch {
+		case strings.Contains(m.Name, "Latency"):
+			baselineStr = formatDuration(time.Duration(m.Baseline))
+			currentStr = formatDuration(time.Duration(m.Current))
+		case strings.Contains(m.Name, "Bytes"):
+			baselineStr = formatBytes(uint64(m.Baseline))
+			currentStr = formatBytes(uint64(m.Current))
+		case strings.Contains(m.Name, "Throughput"):
+			baselineStr = fmt.Sprintf("%.2f/sec", m.Baseline)
+			currentStr = fmt.Sprintf("%.2f/sec", m.Current)
+		default:
+			baselineStr = fmt.Sprintf("%.0f", m.Baseline)
+			currentStr = fmt.Sprintf("%.0f", m.Current)
+		}
+
+		deltaStr := fmt.Sprintf("%+.1f%%", m.Delta)
+		fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\n",
+			m.Name, baselineStr, currentStr, deltaStr, status)
+	}
+	tw.Flush()
+
+	fmt.Fprintln(w)
+	if comp.Passed {
+		fmt.Fprintf(w, "✓ No significant regressions (threshold: %.1f%%)\n", threshold)
+	} else {
+		fmt.Fprintf(w, "✗ Regressions detected in: %s (threshold: %.1f%%)\n",
+			strings.Join(comp.FailedMetrics, ", "), threshold)
+	}
+}
+
+func formatJSON(results []Results) (string, error) {
+	// Convert to JSON-friendly format with string durations
+	jsonResults := toJSONResults(results)
+	b, err := json.MarshalIndent(jsonResults, "", "  ")
+	if err != nil {
+		return "", fmt.Errorf("marshaling JSON: %w", err)
+	}
+	return string(b), nil
+}
+
+func formatYAML(results []Results) (string, error) {
+	// Convert to YAML-friendly format with string durations
+	yamlResults := toJSONResults(results)
+	b, err := yaml.Marshal(yamlResults)
+	if err != nil {
+		return "", fmt.Errorf("marshaling YAML: %w", err)
+	}
+	return string(b), nil
+}
+
+func formatTable(results []Results) string {
+	var buf bytes.Buffer
+
+	// Write individual result tables
+	for i := range results {
+		if i > 0 {
+			buf.WriteString("\n")
+		}
+		writeResultTable(&buf, &results[i])
+	}
+
+	// Write comparison table if multiple engines
+	if len(results) > 1 {
+		buf.WriteString("\n")
+		writeComparisonTable(&buf, results)
+	}
+
+	return buf.String()
+}
+
+func writeResultTable(w io.Writer, r *Results) {
+	fmt.Fprintf(w, "=== Benchmark Results: %s Engine ===\n\n", strings.ToUpper(string(r.Engine)))
+
+	tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0)
+
+	// Configuration section
+	fmt.Fprintln(tw, "Configuration:")
+	fmt.Fprintf(tw, "  Templates:\t%d\n", r.TemplateCount)
+	fmt.Fprintf(tw, "  Constraints:\t%d\n", r.ConstraintCount)
+	fmt.Fprintf(tw, "  Objects:\t%d\n", r.ObjectCount)
+	fmt.Fprintf(tw, "  Iterations:\t%d\n", r.Iterations)
+	if r.Concurrency > 1 {
+		fmt.Fprintf(tw, "  Concurrency:\t%d\n", r.Concurrency)
+	}
+	fmt.Fprintf(tw, "  Total Reviews:\t%d\n", r.Iterations*r.ObjectCount)
+	fmt.Fprintln(tw)
+
+	// Skipped templates/constraints/data warning
+	if len(r.SkippedTemplates) > 0 || len(r.SkippedConstraints) > 0 || len(r.SkippedDataObjects) > 0 {
+		fmt.Fprintln(tw, "Warnings:")
+		if len(r.SkippedTemplates) > 0 {
+			fmt.Fprintf(tw, "  Skipped Templates:\t%d (%s)\n",
+				len(r.SkippedTemplates), strings.Join(r.SkippedTemplates, ", "))
+		}
+		if len(r.SkippedConstraints) > 0 {
+			fmt.Fprintf(tw, "  Skipped Constraints:\t%d (%s)\n",
+				len(r.SkippedConstraints), strings.Join(r.SkippedConstraints, ", "))
+		}
+		if len(r.SkippedDataObjects) > 0 {
+			fmt.Fprintf(tw, "  Skipped Data Objects:\t%d (failed to load as referential data)\n",
+				len(r.SkippedDataObjects))
+			// Show first few objects if not too many
+			if len(r.SkippedDataObjects) <= 5 {
+				fmt.Fprintf(tw, "    Objects:\t%s\n", strings.Join(r.SkippedDataObjects, ", "))
+			} else {
+				fmt.Fprintf(tw, "    Objects:\t%s, ... (and %d more)\n",
+					strings.Join(r.SkippedDataObjects[:5], ", "), len(r.SkippedDataObjects)-5)
+			}
+		}
+		fmt.Fprintln(tw)
+	}
+
+	// Informational note about engine limitations (not a warning)
+	if !r.ReferentialDataSupported {
+		fmt.Fprintln(tw, "Note:")
+		fmt.Fprintf(tw, "  Referential Data:\tNot supported by %s engine\n", r.Engine)
+		fmt.Fprintln(tw, "  \t(Referential constraints cannot be exercised with this engine)")
+		fmt.Fprintln(tw)
+	}
+
+	// Timing section with breakdown
+	fmt.Fprintln(tw, "Timing:")
+	fmt.Fprintf(tw, "  Setup Duration:\t%s\n", formatDuration(r.SetupDuration))
+	if r.SetupBreakdown.ClientCreation > 0 {
+		fmt.Fprintf(tw, "    └─ Client Creation:\t%s\n", formatDuration(r.SetupBreakdown.ClientCreation))
+		fmt.Fprintf(tw, "    └─ Template Compilation:\t%s\n", formatDuration(r.SetupBreakdown.TemplateCompilation))
+		fmt.Fprintf(tw, "    └─ Constraint Loading:\t%s\n", formatDuration(r.SetupBreakdown.ConstraintLoading))
+		fmt.Fprintf(tw, "    └─ Data Loading:\t%s\n", formatDuration(r.SetupBreakdown.DataLoading))
+	}
+	fmt.Fprintf(tw, "  Total Duration:\t%s\n", formatDuration(r.TotalDuration))
+	fmt.Fprintf(tw, "  Throughput:\t%.2f reviews/sec\n", r.ReviewsPerSecond)
+	fmt.Fprintln(tw)
+
+	// Latency section
+	fmt.Fprintln(tw, "Latency (per review):")
+	fmt.Fprintf(tw, "  Min:\t%s\n", formatDuration(r.Latencies.Min))
+	fmt.Fprintf(tw, "  Max:\t%s\n", formatDuration(r.Latencies.Max))
+	fmt.Fprintf(tw, "  Mean:\t%s\n", formatDuration(r.Latencies.Mean))
+	fmt.Fprintf(tw, "  P50:\t%s\n", formatDuration(r.Latencies.P50))
+	fmt.Fprintf(tw, "  P95:\t%s\n", formatDuration(r.Latencies.P95))
+	fmt.Fprintf(tw, "  P99:\t%s\n", formatDuration(r.Latencies.P99))
+	fmt.Fprintln(tw)
+
+	// Results section
+	fmt.Fprintln(tw, "Results:")
+	fmt.Fprintf(tw, "  Violations Found:\t%d\n", r.ViolationCount)
+
+	// Memory section (if available)
+	if r.MemoryStats != nil {
+		fmt.Fprintln(tw)
+		fmt.Fprintln(tw, "Memory:")
+		fmt.Fprintf(tw, "  Allocs/Review:\t%d\n", r.MemoryStats.AllocsPerReview)
+		fmt.Fprintf(tw, "  Bytes/Review:\t%s\n", formatBytes(r.MemoryStats.BytesPerReview))
+		fmt.Fprintf(tw, "  Total Allocs:\t%d\n", r.MemoryStats.TotalAllocs)
+		fmt.Fprintf(tw, "  Total Bytes:\t%s\n", formatBytes(r.MemoryStats.TotalBytes))
+	}
+
+	// Stats section (if available)
+	if len(r.StatsEntries) > 0 {
+		fmt.Fprintln(tw)
+		fmt.Fprintln(tw, "Per-Constraint Statistics (from first iteration):")
+		for _, entry := range r.StatsEntries {
+			if entry == nil {
+				continue
+			}
+			// Include StatsFor to identify which constraint/template produced the stat
+			if entry.StatsFor != "" {
+				fmt.Fprintf(tw, "  Constraint: %s (Scope: %s)\n", entry.StatsFor, entry.Scope)
+			} else {
+				fmt.Fprintf(tw, "  Scope: %s\n", entry.Scope)
+			}
+			for _, stat := range entry.Stats {
+				if stat == nil {
+					continue
+				}
+				fmt.Fprintf(tw, "    %s:\t%v %s\n", stat.Name, stat.Value, stat.Source.Type)
+			}
+		}
+	}
+
+	tw.Flush()
+}
+
+// writeComparisonTable writes a side-by-side comparison of engine results.
+func writeComparisonTable(w io.Writer, results []Results) {
+	fmt.Fprintln(w, "=== Engine Comparison ===")
+	fmt.Fprintln(w)
+
+	tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0)
+
+	// Header row
+	fmt.Fprint(tw, "Metric")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%s", strings.ToUpper(string(results[i].Engine)))
+	}
+	fmt.Fprintln(tw)
+
+	// Separator
+	fmt.Fprint(tw, "------")
+	for range results {
+		fmt.Fprint(tw, "\t------")
+	}
+	fmt.Fprintln(tw)
+
+	// Templates
+	fmt.Fprint(tw, "Templates")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%d", results[i].TemplateCount)
+	}
+	fmt.Fprintln(tw)
+
+	// Constraints
+	fmt.Fprint(tw, "Constraints")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%d", results[i].ConstraintCount)
+	}
+	fmt.Fprintln(tw)
+
+	// Setup Duration
+	fmt.Fprint(tw, "Setup Time")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%s", formatDuration(results[i].SetupDuration))
+	}
+	fmt.Fprintln(tw)
+
+	// Throughput
+	fmt.Fprint(tw, "Throughput")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%.2f/sec", results[i].ReviewsPerSecond)
+	}
+	fmt.Fprintln(tw)
+
+	// Mean Latency
+	fmt.Fprint(tw, "Mean Latency")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%s", formatDuration(results[i].Latencies.Mean))
+	}
+	fmt.Fprintln(tw)
+
+	// P95 Latency
+	fmt.Fprint(tw, "P95 Latency")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%s", formatDuration(results[i].Latencies.P95))
+	}
+	fmt.Fprintln(tw)
+
+	// P99 Latency
+	fmt.Fprint(tw, "P99 Latency")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%s", formatDuration(results[i].Latencies.P99))
+	}
+	fmt.Fprintln(tw)
+
+	// Violations
+	fmt.Fprint(tw, "Violations")
+	for i := range results {
+		fmt.Fprintf(tw, "\t%d", results[i].ViolationCount)
+	}
+	fmt.Fprintln(tw)
+
+	// Memory stats (if available)
+	hasMemory := false
+	for i := range results {
+		if results[i].MemoryStats != nil {
+			hasMemory = true
+			break
+		}
+	}
+	if hasMemory {
+		fmt.Fprint(tw, "Allocs/Review")
+		for i := range results {
+			if results[i].MemoryStats != nil {
+				fmt.Fprintf(tw, "\t%d", results[i].MemoryStats.AllocsPerReview)
+			} else {
+				fmt.Fprint(tw, "\t-")
+			}
+		}
+		fmt.Fprintln(tw)
+
+		fmt.Fprint(tw, "Bytes/Review")
+		for i := range results {
+			if results[i].MemoryStats != nil {
+				fmt.Fprintf(tw, "\t%s", formatBytes(results[i].MemoryStats.BytesPerReview))
+			} else {
+				fmt.Fprint(tw, "\t-")
+			}
+		}
+		fmt.Fprintln(tw)
+	}
+
+	tw.Flush()
+
+	// Show performance difference if exactly 2 engines
+	if len(results) == 2 {
+		fmt.Fprintln(w)
+		writePerfDiff(w, &results[0], &results[1])
+	}
+}
+
+// writePerfDiff writes a performance comparison between two engines.
+func writePerfDiff(w io.Writer, r1, r2 *Results) {
+	// Calculate throughput ratio
+	if r1.ReviewsPerSecond <= 0 || r2.ReviewsPerSecond <= 0 {
+		return
+	}
+
+	switch {
+	case r1.ReviewsPerSecond > r2.ReviewsPerSecond:
+		ratio := r1.ReviewsPerSecond / r2.ReviewsPerSecond
+		fmt.Fprintf(w, "Performance: %s is %.2fx faster than %s\n",
+			strings.ToUpper(string(r1.Engine)), ratio, strings.ToUpper(string(r2.Engine)))
+	case r2.ReviewsPerSecond > r1.ReviewsPerSecond:
+		ratio := r2.ReviewsPerSecond / r1.ReviewsPerSecond
+		fmt.Fprintf(w, "Performance: %s is %.2fx faster than %s\n",
+			strings.ToUpper(string(r2.Engine)), ratio, strings.ToUpper(string(r1.Engine)))
+	default:
+		fmt.Fprintln(w, "Performance: Both engines have similar throughput")
+	}
+}
+
+// formatDuration formats a duration in a human-readable way.
+func formatDuration(d time.Duration) string {
+	if d < time.Microsecond {
+		return fmt.Sprintf("%dns", d.Nanoseconds())
+	}
+	if d < time.Millisecond {
+		return fmt.Sprintf("%.2fµs", float64(d.Nanoseconds())/1000)
+	}
+	if d < time.Second {
+		return fmt.Sprintf("%.2fms", float64(d.Nanoseconds())/1000000)
+	}
+	return fmt.Sprintf("%.3fs", d.Seconds())
+}
+
+// formatBytes formats bytes in a human-readable way.
+func formatBytes(b uint64) string {
+	const (
+		KB = 1024
+		MB = KB * 1024
+		GB = MB * 1024
+	)
+	switch {
+	case b >= GB:
+		return fmt.Sprintf("%.2f GB", float64(b)/GB)
+	case b >= MB:
+		return fmt.Sprintf("%.2f MB", float64(b)/MB)
+	case b >= KB:
+		return fmt.Sprintf("%.2f KB", float64(b)/KB)
+	default:
+		return fmt.Sprintf("%d B", b)
+	}
+}
+
+// JSONResults is a JSON/YAML-friendly version of Results with string durations.
+type JSONResults struct {
+	Engine                   string             `json:"engine" yaml:"engine"`
+	TemplateCount            int                `json:"templateCount" yaml:"templateCount"`
+	ConstraintCount          int                `json:"constraintCount" yaml:"constraintCount"`
+	ObjectCount              int                `json:"objectCount" yaml:"objectCount"`
+	Iterations               int                `json:"iterations" yaml:"iterations"`
+	Concurrency              int                `json:"concurrency,omitempty" yaml:"concurrency,omitempty"`
+	TotalReviews             int                `json:"totalReviews" yaml:"totalReviews"`
+	SetupDuration            string             `json:"setupDuration" yaml:"setupDuration"`
+	SetupBreakdown           JSONSetupBreakdown `json:"setupBreakdown" yaml:"setupBreakdown"`
+	TotalDuration            string             `json:"totalDuration" yaml:"totalDuration"`
+	Latencies                JSONLatency        `json:"latencies" yaml:"latencies"`
+	ViolationCount           int                `json:"violationCount" yaml:"violationCount"`
+	ReviewsPerSecond         float64            `json:"reviewsPerSecond" yaml:"reviewsPerSecond"`
+	MemoryStats              *JSONMemoryStats   `json:"memoryStats,omitempty" yaml:"memoryStats,omitempty"`
+	StatsEntries             []JSONStatsEntry   `json:"statsEntries,omitempty" yaml:"statsEntries,omitempty"`
+	SkippedTemplates         []string           `json:"skippedTemplates,omitempty" yaml:"skippedTemplates,omitempty"`
+	SkippedConstraints       []string           `json:"skippedConstraints,omitempty" yaml:"skippedConstraints,omitempty"`
+	SkippedDataObjects       []string           `json:"skippedDataObjects,omitempty" yaml:"skippedDataObjects,omitempty"`
+	ReferentialDataSupported bool               `json:"referentialDataSupported" yaml:"referentialDataSupported"`
+}
+
+// JSONSetupBreakdown is a JSON/YAML-friendly version of SetupBreakdown with string durations.
+type JSONSetupBreakdown struct {
+	ClientCreation      string `json:"clientCreation" yaml:"clientCreation"`
+	TemplateCompilation string `json:"templateCompilation" yaml:"templateCompilation"`
+	ConstraintLoading   string `json:"constraintLoading" yaml:"constraintLoading"`
+	DataLoading         string `json:"dataLoading" yaml:"dataLoading"`
+}
+
+// JSONLatency is a JSON/YAML-friendly version of Latencies with string durations.
+type JSONLatency struct {
+	Min  string `json:"min" yaml:"min"`
+	Max  string `json:"max" yaml:"max"`
+	Mean string `json:"mean" yaml:"mean"`
+	P50  string `json:"p50" yaml:"p50"`
+	P95  string `json:"p95" yaml:"p95"`
+	P99  string `json:"p99" yaml:"p99"`
+}
+
+// JSONMemoryStats is a JSON/YAML-friendly version of MemoryStats.
+type JSONMemoryStats struct {
+	AllocsPerReview uint64 `json:"allocsPerReview" yaml:"allocsPerReview"`
+	BytesPerReview  string `json:"bytesPerReview" yaml:"bytesPerReview"`
+	TotalAllocs     uint64 `json:"totalAllocs" yaml:"totalAllocs"`
+	TotalBytes      string `json:"totalBytes" yaml:"totalBytes"`
+}
+
+// JSONStatsEntry is a JSON/YAML-friendly version of StatsEntry.
+type JSONStatsEntry struct {
+	Scope    string          `json:"scope" yaml:"scope"`
+	StatsFor string          `json:"statsFor,omitempty" yaml:"statsFor,omitempty"`
+	Stats    []JSONStat      `json:"stats" yaml:"stats"`
+	Labels   []JSONStatLabel `json:"labels,omitempty" yaml:"labels,omitempty"`
+}
+
+// JSONStat is a JSON/YAML-friendly version of instrumentation.Stat.
+type JSONStat struct {
+	Name   string      `json:"name" yaml:"name"`
+	Value  interface{} `json:"value" yaml:"value"`
+	Source string      `json:"source" yaml:"source"`
+}
+
+// JSONStatLabel is a JSON/YAML-friendly version of instrumentation.Label.
+type JSONStatLabel struct {
+	Name  string      `json:"name" yaml:"name"`
+	Value interface{} `json:"value" yaml:"value"`
+}
+
+func toJSONResults(results []Results) []JSONResults {
+	jsonResults := make([]JSONResults, len(results))
+	for i := range results {
+		r := &results[i]
+		jr := JSONResults{
+			Engine:          string(r.Engine),
+			TemplateCount:   r.TemplateCount,
+			ConstraintCount: r.ConstraintCount,
+			ObjectCount:     r.ObjectCount,
+			Iterations:      r.Iterations,
+			Concurrency:     r.Concurrency,
+			TotalReviews:    r.Iterations * r.ObjectCount,
+			SetupDuration:   r.SetupDuration.String(),
+			SetupBreakdown: JSONSetupBreakdown{
+				ClientCreation:      r.SetupBreakdown.ClientCreation.String(),
+				TemplateCompilation: r.SetupBreakdown.TemplateCompilation.String(),
+				ConstraintLoading:   r.SetupBreakdown.ConstraintLoading.String(),
+				DataLoading:         r.SetupBreakdown.DataLoading.String(),
+			},
+			TotalDuration: r.TotalDuration.String(),
+			Latencies: JSONLatency{
+				Min:  r.Latencies.Min.String(),
+				Max:  r.Latencies.Max.String(),
+				Mean: r.Latencies.Mean.String(),
+				P50:  r.Latencies.P50.String(),
+				P95:  r.Latencies.P95.String(),
+				P99:  r.Latencies.P99.String(),
+			},
+			ViolationCount:           r.ViolationCount,
+			ReviewsPerSecond:         r.ReviewsPerSecond,
+			SkippedTemplates:         r.SkippedTemplates,
+			SkippedConstraints:       r.SkippedConstraints,
+			SkippedDataObjects:       r.SkippedDataObjects,
+			ReferentialDataSupported: r.ReferentialDataSupported,
+		}
+
+		// Add memory stats if available
+		if r.MemoryStats != nil {
+			jr.MemoryStats = &JSONMemoryStats{
+				AllocsPerReview: r.MemoryStats.AllocsPerReview,
+				BytesPerReview:  formatBytes(r.MemoryStats.BytesPerReview),
+				TotalAllocs:     r.MemoryStats.TotalAllocs,
+				TotalBytes:      formatBytes(r.MemoryStats.TotalBytes),
+			}
+		}
+
+		// Add stats entries if available
+		if len(r.StatsEntries) > 0 {
+			jr.StatsEntries = make([]JSONStatsEntry, 0, len(r.StatsEntries))
+			for _, entry := range r.StatsEntries {
+				if entry == nil {
+					continue
+				}
+				jsonEntry := JSONStatsEntry{
+					Scope:    entry.Scope,
+					StatsFor: entry.StatsFor,
+				}
+				// Convert stats
+				for _, stat := range entry.Stats {
+					if stat == nil {
+						continue
+					}
+					jsonEntry.Stats = append(jsonEntry.Stats, JSONStat{
+						Name:   stat.Name,
+						Value:  stat.Value,
+						Source: fmt.Sprintf("%s/%s", stat.Source.Type, stat.Source.Value),
+					})
+				}
+				// Convert labels
+				for _, label := range entry.Labels {
+					if label == nil {
+						continue
+					}
+					jsonEntry.Labels = append(jsonEntry.Labels, JSONStatLabel{
+						Name:  label.Name,
+						Value: label.Value,
+					})
+				}
+				jr.StatsEntries = append(jr.StatsEntries, jsonEntry)
+			}
+		}
+
+		jsonResults[i] = jr
+	}
+	return jsonResults
+}
diff --git a/pkg/gator/bench/output_test.go b/pkg/gator/bench/output_test.go
new file mode 100644
index 00000000000..5d77dd31333
--- /dev/null
+++ b/pkg/gator/bench/output_test.go
@@ -0,0 +1,771 @@
+package bench
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestParseOutputFormat(t *testing.T) {
+	tests := []struct {
+		input   string
+		want    OutputFormat
+		wantErr bool
+	}{
+		{"", OutputFormatTable, false},
+		{"table", OutputFormatTable, false},
+		{"TABLE", OutputFormatTable, false},
+		{"json", OutputFormatJSON, false},
+		{"JSON", OutputFormatJSON, false},
+		{"yaml", OutputFormatYAML, false},
+		{"YAML", OutputFormatYAML, false},
+		{"invalid", "", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			got, err := ParseOutputFormat(tt.input)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("ParseOutputFormat(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("ParseOutputFormat(%q) = %v, want %v", tt.input, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestFormatResults(t *testing.T) {
+	results := []Results{
+		{
+			Engine:          EngineRego,
+			TemplateCount:   2,
+			ConstraintCount: 3,
+			ObjectCount:     10,
+			Iterations:      100,
+			SetupDuration:   50 * time.Millisecond,
+			TotalDuration:   time.Second,
+			Latencies: Latencies{
+				Min:  500 * time.Microsecond,
+				Max:  5 * time.Millisecond,
+				Mean: 1 * time.Millisecond,
+				P50:  900 * time.Microsecond,
+				P95:  3 * time.Millisecond,
+				P99:  4 * time.Millisecond,
+			},
+			ViolationCount:   50,
+			ReviewsPerSecond: 1000,
+		},
+	}
+
+	t.Run("table format", func(t *testing.T) {
+		output, err := FormatResults(results, OutputFormatTable)
+		if err != nil {
+			t.Fatalf("FormatResults() error = %v", err)
+		}
+
+		// Check for expected content
+		expectedStrings := []string{
+			"REGO Engine",
+			"Templates:",
+			"Constraints:",
+			"Latency",
+			"Min:",
+			"P99:",
+			"Violations Found:",
+		}
+
+		for _, s := range expectedStrings {
+			if !strings.Contains(output, s) {
+				t.Errorf("table output missing expected string %q", s)
+			}
+		}
+	})
+
+	t.Run("json format", func(t *testing.T) {
+		output, err := FormatResults(results, OutputFormatJSON)
+		if err != nil {
+			t.Fatalf("FormatResults() error = %v", err)
+		}
+
+		// Check for expected JSON keys
+		expectedStrings := []string{
+			`"engine": "rego"`,
+			`"templateCount": 2`,
+			`"constraintCount": 3`,
+			`"latencies"`,
+			`"min"`,
+			`"p99"`,
+		}
+
+		for _, s := range expectedStrings {
+			if !strings.Contains(output, s) {
+				t.Errorf("json output missing expected string %q", s)
+			}
+		}
+	})
+
+	t.Run("yaml format", func(t *testing.T) {
+		output, err := FormatResults(results, OutputFormatYAML)
+		if err != nil {
+			t.Fatalf("FormatResults() error = %v", err)
+		}
+
+		// Check for expected YAML keys
+		expectedStrings := []string{
+			"engine: rego",
+			"templateCount: 2",
+			"constraintCount: 3",
+			"latencies:",
+		}
+
+		for _, s := range expectedStrings {
+			if !strings.Contains(output, s) {
+				t.Errorf("yaml output missing expected string %q", s)
+			}
+		}
+	})
+}
+
+func TestFormatDuration(t *testing.T) {
+	tests := []struct {
+		d    time.Duration
+		want string
+	}{
+		{500 * time.Nanosecond, "500ns"},
+		{1500 * time.Nanosecond, "1.50µs"},
+		{500 * time.Microsecond, "500.00µs"},
+		{1500 * time.Microsecond, "1.50ms"},
+		{500 * time.Millisecond, "500.00ms"},
+		{1500 * time.Millisecond, "1.500s"},
+		{2 * time.Second, "2.000s"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.d.String(), func(t *testing.T) {
+			got := formatDuration(tt.d)
+			if got != tt.want {
+				t.Errorf("formatDuration(%v) = %q, want %q", tt.d, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestFormatResults_SetupBreakdown(t *testing.T) {
+	results := []Results{
+		{
+			Engine:          EngineRego,
+			TemplateCount:   1,
+			ConstraintCount: 1,
+			ObjectCount:     1,
+			Iterations:      10,
+			SetupDuration:   100 * time.Millisecond,
+			SetupBreakdown: SetupBreakdown{
+				ClientCreation:      10 * time.Millisecond,
+				TemplateCompilation: 50 * time.Millisecond,
+				ConstraintLoading:   30 * time.Millisecond,
+				DataLoading:         10 * time.Millisecond,
+			},
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 10,
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Check for setup breakdown content
+	expectedStrings := []string{
+		"Client Creation:",
+		"Template Compilation:",
+		"Constraint Loading:",
+		"Data Loading:",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("table output missing setup breakdown: %q", s)
+		}
+	}
+}
+
+func TestFormatResults_SkippedTemplates(t *testing.T) {
+	results := []Results{
+		{
+			Engine:             EngineRego,
+			TemplateCount:      2,
+			ConstraintCount:    2,
+			ObjectCount:        1,
+			Iterations:         10,
+			SetupDuration:      50 * time.Millisecond,
+			TotalDuration:      time.Second,
+			Latencies:          Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:     0,
+			ReviewsPerSecond:   10,
+			SkippedTemplates:   []string{"template1", "template2"},
+			SkippedConstraints: []string{"constraint1"},
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Check for warnings section
+	expectedStrings := []string{
+		"Warnings:",
+		"Skipped Templates:",
+		"template1",
+		"template2",
+		"Skipped Constraints:",
+		"constraint1",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("table output missing skipped warning: %q", s)
+		}
+	}
+}
+
+func TestFormatResults_SkippedDataObjects(t *testing.T) {
+	// Test skipped data objects - this tests actual failures during data loading,
+	// not CEL engine limitations (which use ReferentialDataSupported flag)
+	results := []Results{
+		{
+			Engine:                   EngineRego,
+			TemplateCount:            2,
+			ConstraintCount:          2,
+			ObjectCount:              5,
+			Iterations:               10,
+			SetupDuration:            50 * time.Millisecond,
+			TotalDuration:            time.Second,
+			Latencies:                Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:           0,
+			ReviewsPerSecond:         50,
+			ReferentialDataSupported: true,
+			SkippedDataObjects:       []string{"default/pod1", "default/pod2", "kube-system/configmap1"},
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Check for warnings section with skipped data objects
+	expectedStrings := []string{
+		"Warnings:",
+		"Skipped Data Objects:",
+		"failed to load as referential data",
+		"default/pod1",
+		"default/pod2",
+		"kube-system/configmap1",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("table output missing skipped data warning: %q\nOutput:\n%s", s, output)
+		}
+	}
+}
+
+func TestFormatResults_SkippedDataObjectsTruncated(t *testing.T) {
+	// Test with more than 5 objects to verify truncation
+	results := []Results{
+		{
+			Engine:                   EngineRego,
+			TemplateCount:            2,
+			ConstraintCount:          2,
+			ObjectCount:              10,
+			Iterations:               10,
+			SetupDuration:            50 * time.Millisecond,
+			TotalDuration:            time.Second,
+			Latencies:                Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:           0,
+			ReviewsPerSecond:         100,
+			ReferentialDataSupported: true,
+			SkippedDataObjects: []string{
+				"obj1", "obj2", "obj3", "obj4", "obj5", "obj6", "obj7",
+			},
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Should show truncation message
+	expectedStrings := []string{
+		"Skipped Data Objects:",
+		"and 2 more",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("table output missing truncation message: %q\nOutput:\n%s", s, output)
+		}
+	}
+}
+
+func TestFormatResults_ReferentialDataNotSupported(t *testing.T) {
+	// Test that CEL engine shows informational note (not warning) about referential data
+	results := []Results{
+		{
+			Engine:                   EngineCEL,
+			TemplateCount:            2,
+			ConstraintCount:          2,
+			ObjectCount:              5,
+			Iterations:               10,
+			SetupDuration:            50 * time.Millisecond,
+			TotalDuration:            time.Second,
+			Latencies:                Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:           0,
+			ReviewsPerSecond:         50,
+			ReferentialDataSupported: false, // CEL doesn't support referential data
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Should show informational note, not warning
+	expectedStrings := []string{
+		"Note:",
+		"Referential Data:",
+		"Not supported by",
+		"CEL",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("table output missing referential data note: %q\nOutput:\n%s", s, output)
+		}
+	}
+
+	// Should NOT show "Warnings:" for referential data (that's for actual failures)
+	if strings.Contains(output, "Warnings:") {
+		t.Errorf("table output should not show Warnings for CEL referential data limitation\nOutput:\n%s", output)
+	}
+}
+
+func TestFormatResults_ComparisonTable(t *testing.T) {
+	results := []Results{
+		{
+			Engine:           EngineRego,
+			TemplateCount:    2,
+			ConstraintCount:  2,
+			ObjectCount:      10,
+			Iterations:       100,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: 2 * time.Millisecond, P99: 3 * time.Millisecond},
+			ViolationCount:   10,
+			ReviewsPerSecond: 1000,
+		},
+		{
+			Engine:           EngineCEL,
+			TemplateCount:    2,
+			ConstraintCount:  2,
+			ObjectCount:      10,
+			Iterations:       100,
+			SetupDuration:    30 * time.Millisecond,
+			TotalDuration:    500 * time.Millisecond,
+			Latencies:        Latencies{Mean: 500 * time.Microsecond, P95: time.Millisecond, P99: 2 * time.Millisecond},
+			ViolationCount:   10,
+			ReviewsPerSecond: 2000,
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Check for comparison table content
+	expectedStrings := []string{
+		"Engine Comparison",
+		"Metric",
+		"REGO",
+		"CEL",
+		"Throughput",
+		"Mean Latency",
+		"P95 Latency",
+		"P99 Latency",
+		"Performance:", // Performance comparison line
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("table output missing comparison content: %q", s)
+		}
+	}
+}
+
+func TestFormatResults_SetupBreakdownJSON(t *testing.T) {
+	results := []Results{
+		{
+			Engine:          EngineRego,
+			TemplateCount:   1,
+			ConstraintCount: 1,
+			ObjectCount:     1,
+			Iterations:      10,
+			SetupDuration:   100 * time.Millisecond,
+			SetupBreakdown: SetupBreakdown{
+				ClientCreation:      10 * time.Millisecond,
+				TemplateCompilation: 50 * time.Millisecond,
+				ConstraintLoading:   30 * time.Millisecond,
+				DataLoading:         10 * time.Millisecond,
+			},
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 10,
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatJSON)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Check for setup breakdown in JSON
+	expectedStrings := []string{
+		`"setupBreakdown"`,
+		`"clientCreation"`,
+		`"templateCompilation"`,
+		`"constraintLoading"`,
+		`"dataLoading"`,
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("json output missing setup breakdown: %q", s)
+		}
+	}
+}
+
+func TestFormatResults_SkippedInJSON(t *testing.T) {
+	results := []Results{
+		{
+			Engine:             EngineRego,
+			TemplateCount:      1,
+			ConstraintCount:    1,
+			ObjectCount:        1,
+			Iterations:         10,
+			SetupDuration:      50 * time.Millisecond,
+			TotalDuration:      time.Second,
+			Latencies:          Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:     0,
+			ReviewsPerSecond:   10,
+			SkippedTemplates:   []string{"skipped-template"},
+			SkippedConstraints: []string{"skipped-constraint"},
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatJSON)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Check for skipped items in JSON
+	expectedStrings := []string{
+		`"skippedTemplates"`,
+		`"skipped-template"`,
+		`"skippedConstraints"`,
+		`"skipped-constraint"`,
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("json output missing skipped items: %q", s)
+		}
+	}
+}
+
+func TestFormatResults_EqualThroughput(t *testing.T) {
+	// Test the case where both engines have identical throughput
+	results := []Results{
+		{
+			Engine:           EngineRego,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 1000, // Same throughput
+		},
+		{
+			Engine:           EngineCEL,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 1000, // Same throughput
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Should contain the "similar throughput" message
+	if !strings.Contains(output, "similar throughput") {
+		t.Error("expected 'similar throughput' message for equal performance")
+	}
+}
+
+func TestFormatResults_ZeroThroughput(t *testing.T) {
+	// Test the case where one engine has zero throughput
+	results := []Results{
+		{
+			Engine:           EngineRego,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 0, // Zero throughput
+		},
+		{
+			Engine:           EngineCEL,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 1000,
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Should NOT contain a performance comparison when one has zero throughput
+	if strings.Contains(output, "faster than") {
+		t.Error("should not show performance comparison when throughput is zero")
+	}
+}
+
+func TestFormatResults_RegoFasterThanCEL(t *testing.T) {
+	// Test case where Rego is faster than CEL (reversed from normal)
+	results := []Results{
+		{
+			Engine:           EngineRego,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 2000, // Rego faster
+		},
+		{
+			Engine:           EngineCEL,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 1000,
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Should show REGO is faster
+	if !strings.Contains(output, "REGO is") || !strings.Contains(output, "faster than CEL") {
+		t.Error("expected performance comparison showing REGO faster than CEL")
+	}
+}
+
+func TestWritePerfDiff_NegativeThroughput(t *testing.T) {
+	var buf bytes.Buffer
+	r1 := &Results{Engine: EngineRego, ReviewsPerSecond: -1}
+	r2 := &Results{Engine: EngineCEL, ReviewsPerSecond: 1000}
+
+	writePerfDiff(&buf, r1, r2)
+
+	// Should not output anything when throughput is negative
+	if buf.String() != "" {
+		t.Error("expected no output for negative throughput")
+	}
+}
+
+func TestFormatBytes(t *testing.T) {
+	tests := []struct {
+		bytes uint64
+		want  string
+	}{
+		{0, "0 B"},
+		{512, "512 B"},
+		{1024, "1.00 KB"},
+		{1536, "1.50 KB"},
+		{1048576, "1.00 MB"},
+		{1572864, "1.50 MB"},
+		{1073741824, "1.00 GB"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.want, func(t *testing.T) {
+			got := formatBytes(tt.bytes)
+			if got != tt.want {
+				t.Errorf("formatBytes(%d) = %q, want %q", tt.bytes, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestFormatResults_WithMemoryStats(t *testing.T) {
+	results := []Results{
+		{
+			Engine:           EngineRego,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 10,
+			MemoryStats: &MemoryStats{
+				AllocsPerReview: 500,
+				BytesPerReview:  10240,
+				TotalAllocs:     5000,
+				TotalBytes:      102400,
+			},
+		},
+	}
+
+	t.Run("table format with memory", func(t *testing.T) {
+		output, err := FormatResults(results, OutputFormatTable)
+		if err != nil {
+			t.Fatalf("FormatResults() error = %v", err)
+		}
+
+		expectedStrings := []string{
+			"Memory:",
+			"Allocs/Review:",
+			"500",
+			"Bytes/Review:",
+			"10.00 KB",
+			"Total Allocs:",
+			"Total Bytes:",
+		}
+
+		for _, s := range expectedStrings {
+			if !strings.Contains(output, s) {
+				t.Errorf("table output missing memory stat: %q", s)
+			}
+		}
+	})
+
+	t.Run("json format with memory", func(t *testing.T) {
+		output, err := FormatResults(results, OutputFormatJSON)
+		if err != nil {
+			t.Fatalf("FormatResults() error = %v", err)
+		}
+
+		expectedStrings := []string{
+			`"memoryStats"`,
+			`"allocsPerReview": 500`,
+			`"bytesPerReview": "10.00 KB"`,
+			`"totalAllocs": 5000`,
+		}
+
+		for _, s := range expectedStrings {
+			if !strings.Contains(output, s) {
+				t.Errorf("json output missing memory stat: %q", s)
+			}
+		}
+	})
+}
+
+func TestFormatResults_ComparisonTableWithMemory(t *testing.T) {
+	results := []Results{
+		{
+			Engine:           EngineRego,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 1000,
+			MemoryStats: &MemoryStats{
+				AllocsPerReview: 500,
+				BytesPerReview:  10240,
+			},
+		},
+		{
+			Engine:           EngineCEL,
+			TemplateCount:    1,
+			ConstraintCount:  1,
+			ObjectCount:      1,
+			Iterations:       10,
+			SetupDuration:    50 * time.Millisecond,
+			TotalDuration:    time.Second,
+			Latencies:        Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond},
+			ViolationCount:   0,
+			ReviewsPerSecond: 2000,
+			MemoryStats: &MemoryStats{
+				AllocsPerReview: 200,
+				BytesPerReview:  4096,
+			},
+		},
+	}
+
+	output, err := FormatResults(results, OutputFormatTable)
+	if err != nil {
+		t.Fatalf("FormatResults() error = %v", err)
+	}
+
+	// Check for memory in comparison table
+	expectedStrings := []string{
+		"Allocs/Review",
+		"Bytes/Review",
+	}
+
+	for _, s := range expectedStrings {
+		if !strings.Contains(output, s) {
+			t.Errorf("comparison table missing memory row: %q", s)
+		}
+	}
+}
diff --git a/pkg/gator/bench/types.go b/pkg/gator/bench/types.go
new file mode 100644
index 00000000000..8527216e3e1
--- /dev/null
+++ b/pkg/gator/bench/types.go
@@ -0,0 +1,219 @@
+package bench
+
+import (
+	"io"
+	"time"
+
+	"github.com/open-policy-agent/frameworks/constraint/pkg/instrumentation"
+)
+
+// Engine represents the policy evaluation engine to benchmark.
+type Engine string
+
+const (
+	// EngineRego benchmarks the Rego/OPA policy engine.
+	EngineRego Engine = "rego"
+	// EngineCEL benchmarks the Kubernetes CEL policy engine.
+	EngineCEL Engine = "cel"
+	// EngineAll benchmarks both Rego and CEL engines.
+	EngineAll Engine = "all"
+)
+
+// Opts configures the benchmark run.
+type Opts struct {
+	// Filenames are the paths to files or directories containing
+	// ConstraintTemplates, Constraints, and objects to review.
+	Filenames []string
+
+	// Images are OCI image URLs containing policies.
+	Images []string
+
+	// TempDir is the directory for unpacking OCI images.
+	TempDir string
+
+	// Engine specifies which policy engine(s) to benchmark.
+	Engine Engine
+
+	// Iterations is the number of review cycles to run.
+	Iterations int
+
+	// Warmup is the number of warmup iterations before measurement.
+	Warmup int
+
+	// GatherStats enables collection of per-constraint statistics
+	// from the constraint framework.
+	GatherStats bool
+
+	// Memory enables memory profiling during benchmark.
+	Memory bool
+
+	// Baseline is the path to a baseline results file for comparison.
+	Baseline string
+
+	// Save is the path to save benchmark results for future comparison.
+	Save string
+
+	// Threshold is the regression threshold percentage for comparison.
+	// If a metric regresses more than this percentage, the benchmark fails.
+	Threshold float64
+
+	// MinThreshold is the minimum absolute latency difference (in duration) to consider
+	// a regression. This prevents false positives on very fast policies where small
+	// absolute changes appear as large percentage changes.
+	MinThreshold time.Duration
+
+	// Concurrency is the number of concurrent goroutines to use for reviews.
+	// Default is 1 (sequential). Higher values simulate realistic webhook load.
+	Concurrency int
+
+	// Writer is where warnings and informational messages are written.
+	// If nil, warnings are not printed.
+	Writer io.Writer
+}
+
+// Results contains benchmark metrics for a single engine.
+type Results struct {
+	// Engine is the policy engine that was benchmarked.
+	Engine Engine `json:"engine" yaml:"engine"`
+
+	// TemplateCount is the number of ConstraintTemplates loaded.
+	TemplateCount int `json:"templateCount" yaml:"templateCount"`
+
+	// SkippedTemplates contains names of templates skipped due to engine incompatibility.
+	SkippedTemplates []string `json:"skippedTemplates,omitempty" yaml:"skippedTemplates,omitempty"`
+
+	// ConstraintCount is the number of Constraints loaded.
+	ConstraintCount int `json:"constraintCount" yaml:"constraintCount"`
+
+	// SkippedConstraints contains names of constraints skipped due to missing templates.
+	SkippedConstraints []string `json:"skippedConstraints,omitempty" yaml:"skippedConstraints,omitempty"`
+
+	// SkippedDataObjects contains names of objects that failed to load as referential data.
+	// This is populated only when actual errors occur during data loading, not for expected
+	// engine limitations (use ReferentialDataSupported for that).
+	SkippedDataObjects []string `json:"skippedDataObjects,omitempty" yaml:"skippedDataObjects,omitempty"`
+
+	// ReferentialDataSupported indicates whether the engine supports referential data.
+	// When false, referential constraints cannot be exercised (e.g., CEL engine).
+	ReferentialDataSupported bool `json:"referentialDataSupported" yaml:"referentialDataSupported"`
+
+	// ObjectCount is the number of objects reviewed.
+	ObjectCount int `json:"objectCount" yaml:"objectCount"`
+
+	// Iterations is the number of review cycles run.
+	Iterations int `json:"iterations" yaml:"iterations"`
+
+	// Concurrency is the number of concurrent goroutines used.
+	Concurrency int `json:"concurrency" yaml:"concurrency"`
+
+	// SetupDuration is the total time taken to load templates, constraints, and data.
+	SetupDuration time.Duration `json:"setupDuration" yaml:"setupDuration"`
+
+	// SetupBreakdown contains detailed timing for each setup phase.
+	SetupBreakdown SetupBreakdown `json:"setupBreakdown" yaml:"setupBreakdown"`
+
+	// TotalDuration is the total time for all review iterations.
+	TotalDuration time.Duration `json:"totalDuration" yaml:"totalDuration"`
+
+	// Latencies contains timing for each review operation.
+	Latencies Latencies `json:"latencies" yaml:"latencies"`
+
+	// ViolationCount is the total number of violations found.
+	ViolationCount int `json:"violationCount" yaml:"violationCount"`
+
+	// ReviewsPerSecond is the throughput metric (reviews/second).
+	ReviewsPerSecond float64 `json:"reviewsPerSecond" yaml:"reviewsPerSecond"`
+
+	// MemoryStats contains memory allocation statistics (only populated with --memory).
+	MemoryStats *MemoryStats `json:"memoryStats,omitempty" yaml:"memoryStats,omitempty"`
+
+	// StatsEntries contains per-constraint statistics from the policy engine (only populated with --stats).
+	StatsEntries []*instrumentation.StatsEntry `json:"statsEntries,omitempty" yaml:"statsEntries,omitempty"`
+}
+
+// SetupBreakdown contains detailed timing for setup phases.
+type SetupBreakdown struct {
+	// ClientCreation is the time to create the constraint client.
+	ClientCreation time.Duration `json:"clientCreation" yaml:"clientCreation"`
+
+	// TemplateCompilation is the time to compile all templates.
+	TemplateCompilation time.Duration `json:"templateCompilation" yaml:"templateCompilation"`
+
+	// ConstraintLoading is the time to load all constraints.
+	ConstraintLoading time.Duration `json:"constraintLoading" yaml:"constraintLoading"`
+
+	// DataLoading is the time to load reference data.
+	DataLoading time.Duration `json:"dataLoading" yaml:"dataLoading"`
+}
+
+// Latencies contains latency statistics.
+type Latencies struct {
+	// Min is the minimum latency observed.
+	Min time.Duration `json:"min" yaml:"min"`
+
+	// Max is the maximum latency observed.
+	Max time.Duration `json:"max" yaml:"max"`
+
+	// Mean is the average latency.
+	Mean time.Duration `json:"mean" yaml:"mean"`
+
+	// P50 is the 50th percentile (median) latency.
+	P50 time.Duration `json:"p50" yaml:"p50"`
+
+	// P95 is the 95th percentile latency.
+	P95 time.Duration `json:"p95" yaml:"p95"`
+
+	// P99 is the 99th percentile latency.
+	P99 time.Duration `json:"p99" yaml:"p99"`
+}
+
+// MemoryStats contains memory allocation statistics from benchmark runs.
+type MemoryStats struct {
+	// AllocsPerReview is the average number of allocations per review.
+	AllocsPerReview uint64 `json:"allocsPerReview" yaml:"allocsPerReview"`
+
+	// BytesPerReview is the average bytes allocated per review.
+	BytesPerReview uint64 `json:"bytesPerReview" yaml:"bytesPerReview"`
+
+	// TotalAllocs is the total number of allocations during measurement.
+	TotalAllocs uint64 `json:"totalAllocs" yaml:"totalAllocs"`
+
+	// TotalBytes is the total bytes allocated during measurement.
+	TotalBytes uint64 `json:"totalBytes" yaml:"totalBytes"`
+}
+
+// ComparisonResult contains the result of comparing current results against a baseline.
+type ComparisonResult struct {
+	// BaselineEngine is the engine from the baseline.
+	BaselineEngine Engine `json:"baselineEngine" yaml:"baselineEngine"`
+
+	// CurrentEngine is the engine from the current run.
+	CurrentEngine Engine `json:"currentEngine" yaml:"currentEngine"`
+
+	// Metrics contains the comparison for each metric.
+	Metrics []MetricComparison `json:"metrics" yaml:"metrics"`
+
+	// Passed indicates whether all metrics are within threshold.
+	Passed bool `json:"passed" yaml:"passed"`
+
+	// FailedMetrics contains names of metrics that exceeded threshold.
+	FailedMetrics []string `json:"failedMetrics,omitempty" yaml:"failedMetrics,omitempty"`
+}
+
+// MetricComparison contains comparison data for a single metric.
+type MetricComparison struct {
+	// Name is the metric name.
+	Name string `json:"name" yaml:"name"`
+
+	// Baseline is the baseline value.
+	Baseline float64 `json:"baseline" yaml:"baseline"`
+
+	// Current is the current value.
+	Current float64 `json:"current" yaml:"current"`
+
+	// Delta is the percentage change (positive = regression for latency, negative = improvement).
+	Delta float64 `json:"delta" yaml:"delta"`
+
+	// Passed indicates whether this metric is within threshold.
+	Passed bool `json:"passed" yaml:"passed"`
+}
diff --git a/pkg/gator/fileext.go b/pkg/gator/fileext.go
new file mode 100644
index 00000000000..b703ba1d7df
--- /dev/null
+++ b/pkg/gator/fileext.go
@@ -0,0 +1,21 @@
+package gator
+
+// File extension constants for supported file formats.
+const (
+	// ExtYAML is the standard YAML file extension.
+	ExtYAML = ".yaml"
+	// ExtYML is the alternative YAML file extension.
+	ExtYML = ".yml"
+	// ExtJSON is the JSON file extension.
+	ExtJSON = ".json"
+)
+
+// IsYAMLExtension returns true if the extension is a valid YAML extension.
+func IsYAMLExtension(ext string) bool {
+	return ext == ExtYAML || ext == ExtYML
+}
+
+// IsSupportedExtension returns true if the extension is supported (YAML or JSON).
+func IsSupportedExtension(ext string) bool {
+	return ext == ExtYAML || ext == ExtYML || ext == ExtJSON
+}
diff --git a/pkg/gator/fileext_test.go b/pkg/gator/fileext_test.go
new file mode 100644
index 00000000000..7e07fdb8deb
--- /dev/null
+++ b/pkg/gator/fileext_test.go
@@ -0,0 +1,45 @@
+package gator
+
+import "testing"
+
+func TestIsYAMLExtension(t *testing.T) {
+	tests := []struct {
+		ext      string
+		expected bool
+	}{
+		{ExtYAML, true},
+		{ExtYML, true},
+		{ExtJSON, false},
+		{".txt", false},
+		{"", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.ext, func(t *testing.T) {
+			if got := IsYAMLExtension(tt.ext); got != tt.expected {
+				t.Errorf("IsYAMLExtension(%q) = %v, want %v", tt.ext, got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestIsSupportedExtension(t *testing.T) {
+	tests := []struct {
+		ext      string
+		expected bool
+	}{
+		{ExtYAML, true},
+		{ExtYML, true},
+		{ExtJSON, true},
+		{".txt", false},
+		{"", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.ext, func(t *testing.T) {
+			if got := IsSupportedExtension(tt.ext); got != tt.expected {
+				t.Errorf("IsSupportedExtension(%q) = %v, want %v", tt.ext, got, tt.expected)
+			}
+		})
+	}
+}
diff --git a/pkg/gator/reader/filereader.go b/pkg/gator/reader/filereader.go
index 6c1f1a7d78e..3d07262a693 100644
--- a/pkg/gator/reader/filereader.go
+++ b/pkg/gator/reader/filereader.go
@@ -6,11 +6,12 @@ import (
 	"os"
 	"path/filepath"
 
+	"github.com/open-policy-agent/gatekeeper/v3/pkg/gator"
 	"github.com/open-policy-agent/gatekeeper/v3/pkg/oci"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 )
 
-var allowedExtensions = []string{".yaml", ".yml", ".json"}
+var allowedExtensions = []string{gator.ExtYAML, gator.ExtYML, gator.ExtJSON}
 
 func ReadSources(filenames []string, images []string, tempDir string) ([]*unstructured.Unstructured, error) {
 	var sources []*source
diff --git a/pkg/gator/verify/read_suites.go b/pkg/gator/verify/read_suites.go
index ce72e3578b8..3d939ef0bcc 100644
--- a/pkg/gator/verify/read_suites.go
+++ b/pkg/gator/verify/read_suites.go
@@ -135,7 +135,7 @@ type fileList []string
 func (l *fileList) addFile(target string) error {
 	// target is a file.
 	ext := path.Ext(target)
-	if ext != ".yaml" && ext != ".yml" {
+	if !gator.IsYAMLExtension(ext) {
 		return fmt.Errorf("%w: %q", ErrUnsupportedExtension, ext)
 	}
 	*l = append(*l, target)
@@ -172,7 +172,7 @@ func isYAMLFile(d fs.DirEntry) bool {
 		return false
 	}
 	ext := path.Ext(d.Name())
-	return ext == ".yaml" || ext == ".yml"
+	return gator.IsYAMLExtension(ext)
 }
 
 func readSuite(f fs.FS, path string) (*Suite, error) {
diff --git a/test/gator/bench/basic/constraint.yaml b/test/gator/bench/basic/constraint.yaml
new file mode 100644
index 00000000000..d845b242643
--- /dev/null
+++ b/test/gator/bench/basic/constraint.yaml
@@ -0,0 +1,11 @@
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sRequiredLabels
+metadata:
+  name: require-team-label
+spec:
+  match:
+    kinds:
+      - apiGroups: [""]
+        kinds: ["Pod"]
+  parameters:
+    labels: ["team"]
diff --git a/test/gator/bench/basic/resources.yaml b/test/gator/bench/basic/resources.yaml
new file mode 100644
index 00000000000..3fd85fbbb11
--- /dev/null
+++ b/test/gator/bench/basic/resources.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: valid-pod
+  labels:
+    team: platform
+spec:
+  containers:
+    - name: nginx
+      image: nginx
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: invalid-pod
+spec:
+  containers:
+    - name: nginx
+      image: nginx
diff --git a/test/gator/bench/basic/template.yaml b/test/gator/bench/basic/template.yaml
new file mode 100644
index 00000000000..fe36b5a67de
--- /dev/null
+++ b/test/gator/bench/basic/template.yaml
@@ -0,0 +1,28 @@
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8srequiredlabels
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sRequiredLabels
+      validation:
+        openAPIV3Schema:
+          type: object
+          properties:
+            labels:
+              type: array
+              items:
+                type: string
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8srequiredlabels
+        violation[{"msg": msg}] {
+          provided := {label | input.review.object.metadata.labels[label]}
+          required := {label | label := input.parameters.labels[_]}
+          missing := required - provided
+          count(missing) > 0
+          msg := sprintf("Missing required labels: %v", [missing])
+        }
diff --git a/test/gator/bench/both/constraint.yaml b/test/gator/bench/both/constraint.yaml
new file mode 100644
index 00000000000..c331ee3c4a4
--- /dev/null
+++ b/test/gator/bench/both/constraint.yaml
@@ -0,0 +1,13 @@
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sAllowedRepos
+metadata:
+  name: allowed-repos
+spec:
+  match:
+    kinds:
+      - apiGroups: [""]
+        kinds: ["Pod"]
+  parameters:
+    repos:
+      - "gcr.io/myproject/"
+      - "docker.io/library/"
diff --git a/test/gator/bench/both/resources.yaml b/test/gator/bench/both/resources.yaml
new file mode 100644
index 00000000000..f4112c7eca1
--- /dev/null
+++ b/test/gator/bench/both/resources.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: allowed-image
+  namespace: default
+spec:
+  containers:
+    - name: app
+      image: gcr.io/myproject/myapp:v1.0
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: disallowed-image
+  namespace: default
+spec:
+  containers:
+    - name: app
+      image: quay.io/unauthorized/app:latest
diff --git a/test/gator/bench/both/template.yaml b/test/gator/bench/both/template.yaml
new file mode 100644
index 00000000000..55708544651
--- /dev/null
+++ b/test/gator/bench/both/template.yaml
@@ -0,0 +1,44 @@
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8sallowedrepos
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sAllowedRepos
+      validation:
+        openAPIV3Schema:
+          type: object
+          properties:
+            repos:
+              type: array
+              items:
+                type: string
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      rego: |
+        package k8sallowedrepos
+
+        violation[{"msg": msg}] {
+          container := input.review.object.spec.containers[_]
+          not strings.any_prefix_match(container.image, input.parameters.repos)
+          msg := sprintf("container <%v> has an invalid image repo <%v>, allowed repos are %v", [container.name, container.image, input.parameters.repos])
+        }
+
+        violation[{"msg": msg}] {
+          container := input.review.object.spec.initContainers[_]
+          not strings.any_prefix_match(container.image, input.parameters.repos)
+          msg := sprintf("initContainer <%v> has an invalid image repo <%v>, allowed repos are %v", [container.name, container.image, input.parameters.repos])
+        }
+      code:
+        - engine: K8sNativeValidation
+          source:
+            validations:
+              - expression: "object.spec.containers.all(c, variables.repos.exists(repo, c.image.startsWith(repo)))"
+                messageExpression: "'container ' + variables.failedContainer + ' has an invalid image repo, allowed repos are ' + variables.repos.join(', ')"
+            variables:
+              - name: repos
+                expression: "has(variables.params.repos) ? variables.params.repos : []"
+              - name: failedContainer
+                expression: "object.spec.containers.filter(c, !variables.repos.exists(repo, c.image.startsWith(repo))).map(c, c.name).join(', ')"
diff --git a/test/gator/bench/cel/constraint.yaml b/test/gator/bench/cel/constraint.yaml
new file mode 100644
index 00000000000..3704bfa3b08
--- /dev/null
+++ b/test/gator/bench/cel/constraint.yaml
@@ -0,0 +1,9 @@
+apiVersion: constraints.gatekeeper.sh/v1beta1
+kind: K8sContainerLimits
+metadata:
+  name: require-limits
+spec:
+  match:
+    kinds:
+      - apiGroups: [""]
+        kinds: ["Pod"]
diff --git a/test/gator/bench/cel/resources.yaml b/test/gator/bench/cel/resources.yaml
new file mode 100644
index 00000000000..12637bb8483
--- /dev/null
+++ b/test/gator/bench/cel/resources.yaml
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: pod-with-limits
+  namespace: default
+spec:
+  containers:
+    - name: nginx
+      image: nginx:latest
+      resources:
+        limits:
+          cpu: "500m"
+          memory: "128Mi"
+        requests:
+          cpu: "250m"
+          memory: "64Mi"
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: pod-without-limits
+  namespace: default
+spec:
+  containers:
+    - name: nginx
+      image: nginx:latest
diff --git a/test/gator/bench/cel/template.yaml b/test/gator/bench/cel/template.yaml
new file mode 100644
index 00000000000..d37ef8e9216
--- /dev/null
+++ b/test/gator/bench/cel/template.yaml
@@ -0,0 +1,17 @@
+apiVersion: templates.gatekeeper.sh/v1
+kind: ConstraintTemplate
+metadata:
+  name: k8scontainerlimits
+spec:
+  crd:
+    spec:
+      names:
+        kind: K8sContainerLimits
+  targets:
+    - target: admission.k8s.gatekeeper.sh
+      code:
+        - engine: K8sNativeValidation
+          source:
+            validations:
+              - expression: "has(object.spec.containers) && object.spec.containers.all(c, has(c.resources) && has(c.resources.limits))"
+                message: "All containers must have resource limits"
diff --git a/test/gator/bench/scripts/analyze-data.sh b/test/gator/bench/scripts/analyze-data.sh
new file mode 100755
index 00000000000..1ad8a1ea25a
--- /dev/null
+++ b/test/gator/bench/scripts/analyze-data.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# Analysis script for gator bench data
+
+OUTPUT_DIR="/tmp/gator-bench-data"
+
+if [ ! -d "$OUTPUT_DIR" ]; then
+  echo "Error: No data found. Run gather-data.sh first."
+  exit 1
+fi
+
+echo "=== Gator Bench Data Analysis ==="
+echo ""
+
+###############################################################################
+# Test 1: CEL vs Rego Comparison
+###############################################################################
+echo "=== Test 1: CEL vs Rego Comparison ==="
+echo ""
+
+if [ -f "$OUTPUT_DIR/test1_rego.json" ] && [ -f "$OUTPUT_DIR/test1_cel.json" ]; then
+  REGO_THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$OUTPUT_DIR/test1_rego.json")
+  CEL_THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$OUTPUT_DIR/test1_cel.json")
+
+  REGO_MEAN=$(jq -r '.[0].latencies.mean' "$OUTPUT_DIR/test1_rego.json")
+  CEL_MEAN=$(jq -r '.[0].latencies.mean' "$OUTPUT_DIR/test1_cel.json")
+
+  REGO_P99=$(jq -r '.[0].latencies.p99' "$OUTPUT_DIR/test1_rego.json")
+  CEL_P99=$(jq -r '.[0].latencies.p99' "$OUTPUT_DIR/test1_cel.json")
+
+  REGO_SETUP=$(jq -r '.[0].setupDuration' "$OUTPUT_DIR/test1_rego.json")
+  CEL_SETUP=$(jq -r '.[0].setupDuration' "$OUTPUT_DIR/test1_cel.json")
+
+  echo "Metric              Rego              CEL               Ratio (CEL/Rego)"
+  echo "------              ----              ---               ----------------"
+  printf "Throughput          %-17.2f %-17.2f %.2fx\n" "$REGO_THROUGHPUT" "$CEL_THROUGHPUT" "$(echo "scale=2; $CEL_THROUGHPUT / $REGO_THROUGHPUT" | bc)"
+  printf "Mean Latency (ns)   %-17.0f %-17.0f %.2fx\n" "$REGO_MEAN" "$CEL_MEAN" "$(echo "scale=2; $REGO_MEAN / $CEL_MEAN" | bc)"
+  printf "P99 Latency (ns)    %-17.0f %-17.0f %.2fx\n" "$REGO_P99" "$CEL_P99" "$(echo "scale=2; $REGO_P99 / $CEL_P99" | bc)"
+  printf "Setup Time (ns)     %-17.0f %-17.0f %.2fx\n" "$REGO_SETUP" "$CEL_SETUP" "$(echo "scale=2; $REGO_SETUP / $CEL_SETUP" | bc)"
+  echo ""
+fi
+
+###############################################################################
+# Test 2: Concurrency Scaling
+###############################################################################
+echo "=== Test 2: Concurrency Scaling ==="
+echo ""
+
+echo "Concurrency  Throughput     P99 Latency    Efficiency"
+echo "-----------  ----------     -----------    ----------"
+
+BASELINE_THROUGHPUT=""
+for CONC in 1 2 4 8 16; do
+  FILE="$OUTPUT_DIR/test2_conc_${CONC}.json"
+  if [ -f "$FILE" ]; then
+    THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$FILE")
+    P99=$(jq -r '.[0].latencies.p99' "$FILE")
+
+    if [ -z "$BASELINE_THROUGHPUT" ]; then
+      BASELINE_THROUGHPUT=$THROUGHPUT
+      EFFICIENCY="100%"
+    else
+      # Expected linear scaling
+      EXPECTED=$(echo "scale=2; $BASELINE_THROUGHPUT * $CONC" | bc)
+      EFF=$(echo "scale=0; ($THROUGHPUT / $EXPECTED) * 100" | bc)
+      EFFICIENCY="${EFF}%"
+    fi
+
+    P99_MS=$(echo "scale=3; $P99 / 1000000" | bc)
+    printf "%-12d %-14.2f %-14.3fms %s\n" "$CONC" "$THROUGHPUT" "$P99_MS" "$EFFICIENCY"
+  fi
+done
+echo ""
+
+###############################################################################
+# Test 3: P99 Stability
+###############################################################################
+echo "=== Test 3: P99 Stability vs Iteration Count ==="
+echo ""
+
+echo "Iterations   P50 (µs)    P95 (µs)    P99 (µs)    Mean (µs)"
+echo "----------   --------    --------    --------    ---------"
+
+for ITER in 50 100 500 1000 5000; do
+  FILE="$OUTPUT_DIR/test3_iter_${ITER}.json"
+  if [ -f "$FILE" ]; then
+    P50=$(jq -r '.[0].latencies.p50' "$FILE")
+    P95=$(jq -r '.[0].latencies.p95' "$FILE")
+    P99=$(jq -r '.[0].latencies.p99' "$FILE")
+    MEAN=$(jq -r '.[0].latencies.mean' "$FILE")
+
+    P50_US=$(echo "scale=2; $P50 / 1000" | bc)
+    P95_US=$(echo "scale=2; $P95 / 1000" | bc)
+    P99_US=$(echo "scale=2; $P99 / 1000" | bc)
+    MEAN_US=$(echo "scale=2; $MEAN / 1000" | bc)
+
+    printf "%-12d %-11.2f %-11.2f %-11.2f %.2f\n" "$ITER" "$P50_US" "$P95_US" "$P99_US" "$MEAN_US"
+  fi
+done
+echo ""
+
+###############################################################################
+# Test 4: Memory Comparison
+###############################################################################
+echo "=== Test 4: Memory Profiling ==="
+echo ""
+
+if [ -f "$OUTPUT_DIR/test4_rego_memory.json" ] && [ -f "$OUTPUT_DIR/test4_cel_memory.json" ]; then
+  REGO_ALLOCS=$(jq -r '.[0].memoryStats.allocsPerReview // "N/A"' "$OUTPUT_DIR/test4_rego_memory.json")
+  CEL_ALLOCS=$(jq -r '.[0].memoryStats.allocsPerReview // "N/A"' "$OUTPUT_DIR/test4_cel_memory.json")
+
+  REGO_BYTES=$(jq -r '.[0].memoryStats.bytesPerReview // "N/A"' "$OUTPUT_DIR/test4_rego_memory.json")
+  CEL_BYTES=$(jq -r '.[0].memoryStats.bytesPerReview // "N/A"' "$OUTPUT_DIR/test4_cel_memory.json")
+
+  echo "Metric              Rego              CEL"
+  echo "------              ----              ---"
+  printf "Allocs/Review       %-17s %s\n" "$REGO_ALLOCS" "$CEL_ALLOCS"
+  printf "Bytes/Review        %-17s %s\n" "$REGO_BYTES" "$CEL_BYTES"
+  echo ""
+fi
+
+###############################################################################
+# Test 5: Warmup Impact
+###############################################################################
+echo "=== Test 5: Warmup Impact ==="
+echo ""
+
+echo "Warmup       Mean (µs)   P99 (µs)"
+echo "------       ---------   --------"
+
+for WARMUP in 0 5 10 50 100; do
+  FILE="$OUTPUT_DIR/test5_warmup_${WARMUP}.json"
+  if [ -f "$FILE" ]; then
+    MEAN=$(jq -r '.[0].latencies.mean' "$FILE")
+    P99=$(jq -r '.[0].latencies.p99' "$FILE")
+
+    MEAN_US=$(echo "scale=2; $MEAN / 1000" | bc)
+    P99_US=$(echo "scale=2; $P99 / 1000" | bc)
+
+    printf "%-12d %-11.2f %.2f\n" "$WARMUP" "$MEAN_US" "$P99_US"
+  fi
+done
+echo ""
+
+###############################################################################
+# Test 6: Variance Analysis
+###############################################################################
+echo "=== Test 6: Variance Analysis ==="
+echo ""
+
+echo "Run   Throughput     Mean (µs)    P99 (µs)"
+echo "---   ----------     ---------    --------"
+
+SUM_THROUGHPUT=0
+SUM_MEAN=0
+SUM_P99=0
+COUNT=0
+
+for RUN in 1 2 3 4 5; do
+  FILE="$OUTPUT_DIR/test6_run_${RUN}.json"
+  if [ -f "$FILE" ]; then
+    THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$FILE")
+    MEAN=$(jq -r '.[0].latencies.mean' "$FILE")
+    P99=$(jq -r '.[0].latencies.p99' "$FILE")
+
+    MEAN_US=$(echo "scale=2; $MEAN / 1000" | bc)
+    P99_US=$(echo "scale=2; $P99 / 1000" | bc)
+
+    printf "%-5d %-14.2f %-12.2f %.2f\n" "$RUN" "$THROUGHPUT" "$MEAN_US" "$P99_US"
+
+    SUM_THROUGHPUT=$(echo "$SUM_THROUGHPUT + $THROUGHPUT" | bc)
+    SUM_MEAN=$(echo "$SUM_MEAN + $MEAN_US" | bc)
+    SUM_P99=$(echo "$SUM_P99 + $P99_US" | bc)
+    COUNT=$((COUNT + 1))
+  fi
+done
+
+if [ $COUNT -gt 0 ]; then
+  AVG_THROUGHPUT=$(echo "scale=2; $SUM_THROUGHPUT / $COUNT" | bc)
+  AVG_MEAN=$(echo "scale=2; $SUM_MEAN / $COUNT" | bc)
+  AVG_P99=$(echo "scale=2; $SUM_P99 / $COUNT" | bc)
+
+  echo "---   ----------     ---------    --------"
+  printf "AVG   %-14.2f %-12.2f %.2f\n" "$AVG_THROUGHPUT" "$AVG_MEAN" "$AVG_P99"
+fi
+echo ""
+
+echo "=== Analysis Complete ==="
diff --git a/test/gator/bench/scripts/gather-data.sh b/test/gator/bench/scripts/gather-data.sh
new file mode 100755
index 00000000000..66eb445f6d3
--- /dev/null
+++ b/test/gator/bench/scripts/gather-data.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Performance data gathering script for gator bench
+# This script collects data to understand performance characteristics
+
+set -e
+
+GATOR="./bin/gator"
+OUTPUT_DIR="/tmp/gator-bench-data"
+ITERATIONS=1000
+
+mkdir -p "$OUTPUT_DIR"
+
+echo "=== Gator Bench Data Collection ==="
+echo "Output directory: $OUTPUT_DIR"
+echo "Iterations per test: $ITERATIONS"
+echo ""
+
+# Build gator first
+echo "Building gator..."
+make gator > /dev/null 2>&1
+echo "Done."
+echo ""
+
+###############################################################################
+# Test 1: CEL vs Rego - Same Policy (K8sAllowedRepos supports both)
+###############################################################################
+echo "=== Test 1: CEL vs Rego Comparison ==="
+
+echo "Running Rego engine..."
+$GATOR bench \
+  --filename test/gator/bench/both/ \
+  --engine rego \
+  --iterations $ITERATIONS \
+  --output json > "$OUTPUT_DIR/test1_rego.json"
+
+echo "Running CEL engine..."
+$GATOR bench \
+  --filename test/gator/bench/both/ \
+  --engine cel \
+  --iterations $ITERATIONS \
+  --output json > "$OUTPUT_DIR/test1_cel.json"
+
+echo "Results saved to test1_rego.json and test1_cel.json"
+echo ""
+
+###############################################################################
+# Test 2: Concurrency Scaling
+###############################################################################
+echo "=== Test 2: Concurrency Scaling ==="
+
+for CONC in 1 2 4 8 16; do
+  echo "Running with concurrency=$CONC..."
+  $GATOR bench \
+    --filename test/gator/bench/basic/ \
+    --iterations $ITERATIONS \
+    --concurrency $CONC \
+    --output json > "$OUTPUT_DIR/test2_conc_${CONC}.json"
+done
+
+echo "Results saved to test2_conc_*.json"
+echo ""
+
+###############################################################################
+# Test 3: Iteration Count Impact on P99 Stability
+###############################################################################
+echo "=== Test 3: P99 Stability vs Iteration Count ==="
+
+for ITER in 50 100 500 1000 5000; do
+  echo "Running with iterations=$ITER..."
+  $GATOR bench \
+    --filename test/gator/bench/basic/ \
+    --iterations $ITER \
+    --output json > "$OUTPUT_DIR/test3_iter_${ITER}.json"
+done
+
+echo "Results saved to test3_iter_*.json"
+echo ""
+
+###############################################################################
+# Test 4: Memory Profiling Comparison
+###############################################################################
+echo "=== Test 4: Memory Profiling ==="
+
+echo "Running Rego with memory profiling..."
+$GATOR bench \
+  --filename test/gator/bench/both/ \
+  --engine rego \
+  --iterations $ITERATIONS \
+  --memory \
+  --output json > "$OUTPUT_DIR/test4_rego_memory.json"
+
+echo "Running CEL with memory profiling..."
+$GATOR bench \
+  --filename test/gator/bench/both/ \
+  --engine cel \
+  --iterations $ITERATIONS \
+  --memory \
+  --output json > "$OUTPUT_DIR/test4_cel_memory.json"
+
+echo "Results saved to test4_*_memory.json"
+echo ""
+
+###############################################################################
+# Test 5: Warmup Impact
+###############################################################################
+echo "=== Test 5: Warmup Impact ==="
+
+for WARMUP in 0 5 10 50 100; do
+  echo "Running with warmup=$WARMUP..."
+  $GATOR bench \
+    --filename test/gator/bench/basic/ \
+    --iterations 500 \
+    --warmup $WARMUP \
+    --output json > "$OUTPUT_DIR/test5_warmup_${WARMUP}.json"
+done
+
+echo "Results saved to test5_warmup_*.json"
+echo ""
+
+###############################################################################
+# Test 6: Multiple Runs for Variance Analysis
+###############################################################################
+echo "=== Test 6: Variance Analysis (5 runs) ==="
+
+for RUN in 1 2 3 4 5; do
+  echo "Run $RUN/5..."
+  $GATOR bench \
+    --filename test/gator/bench/basic/ \
+    --iterations $ITERATIONS \
+    --output json > "$OUTPUT_DIR/test6_run_${RUN}.json"
+done
+
+echo "Results saved to test6_run_*.json"
+echo ""
+
+###############################################################################
+# Summary
+###############################################################################
+echo "=== Data Collection Complete ==="
+echo ""
+echo "All data saved to: $OUTPUT_DIR"
+echo ""
+echo "To analyze, run: ./test/gator/bench/scripts/analyze-data.sh"
+
diff --git a/website/docs/gator.md b/website/docs/gator.md
index 9f9946556aa..80ef6106ab4 100644
--- a/website/docs/gator.md
+++ b/website/docs/gator.md
@@ -629,6 +629,371 @@ templatename3:
 
 
 
+## The `gator bench` subcommand
+
+`gator bench` measures the performance of Gatekeeper policy evaluation. It loads ConstraintTemplates, Constraints, and Kubernetes resources, then repeatedly evaluates the resources against the constraints to gather latency and throughput metrics.
+
+:::note
+`gator bench` measures **compute-only** policy evaluation latency, which does not include network round-trip time, TLS overhead, or Kubernetes API server processing. Real-world webhook latency will be higher. Use these metrics for relative comparisons between policy versions, not as absolute production latency predictions.
+:::
+
+This command is useful for:
+- **Policy developers**: Testing policy performance before deployment
+- **Platform teams**: Comparing Rego vs CEL engine performance
+- **CI/CD pipelines**: Detecting performance regressions between releases
+
+### Usage
+
+```shell
+gator bench --filename=policies/
+```
+
+#### Flags
+
+| Flag | Short | Default | Description |
+|------|-------|---------|-------------|
+| `--filename` | `-f` | | File or directory containing ConstraintTemplates, Constraints, and resources. Repeatable. |
+| `--image` | `-i` | | OCI image URL containing policies. Repeatable. |
+| `--engine` | `-e` | `cel` | Policy engine to benchmark: `rego`, `cel`, or `all` |
+| `--iterations` | `-n` | `1000` | Number of benchmark iterations. Use ≥1000 for reliable P99 percentiles. |
+| `--warmup` | | `10` | Warmup iterations before measurement |
+| `--concurrency` | `-c` | `1` | Number of concurrent goroutines for parallel evaluation |
+| `--output` | `-o` | `table` | Output format: `table`, `json`, or `yaml` |
+| `--memory` | | `false` | Enable memory profiling (estimates only, not GC-cycle accurate) |
+| `--save` | | | Save results to file for future comparison |
+| `--compare` | | | Compare against a baseline file |
+| `--threshold` | | `10` | Regression threshold percentage (for CI/CD) |
+| `--min-threshold` | | `0` | Minimum absolute latency difference to consider (e.g., `100µs`). Useful for fast policies where percentage changes may be noise. |
+| `--stats` | | `false` | Gather detailed statistics from constraint framework |
+
+### Examples
+
+#### Basic Benchmark
+
+```shell
+gator bench --filename=policies/
+```
+
+Output:
+```
+=== Benchmark Results: Rego Engine ===
+
+Configuration:
+  Templates:      5
+  Constraints:    10
+  Objects:        50
+  Iterations:     1000
+  Total Reviews:  50000
+
+Timing:
+  Setup Duration:  25.00ms
+    └─ Client Creation:       0.05ms
+    └─ Template Compilation:  20.00ms
+    └─ Constraint Loading:    3.00ms
+    └─ Data Loading:          1.95ms
+  Total Duration:  25.00s
+  Throughput:      2000.00 reviews/sec
+
+Latency (per review):
+  Min:   200.00µs
+  Max:   5.00ms
+  Mean:  500.00µs
+  P50:   450.00µs
+  P95:   1.20ms
+  P99:   2.50ms
+
+Results:
+  Violations Found:  1500
+```
+
+#### Concurrent Benchmarking
+
+Simulate parallel load to test contention behavior:
+
+```shell
+gator bench --filename=policies/ --concurrency=4
+```
+
+This runs 4 parallel goroutines each executing reviews concurrently.
+
+```
+=== Benchmark Results: Rego Engine ===
+
+Configuration:
+  Templates:      5
+  Constraints:    10
+  Objects:        50
+  Iterations:     1000
+  Concurrency:    4
+  Total Reviews:  50000
+...
+```
+
+#### Compare Rego vs CEL Engines
+
+```shell
+gator bench --filename=policies/ --engine=all
+```
+
+This runs benchmarks for both engines and displays a comparison table:
+
+```
+=== Engine Comparison ===
+
+Metric         Rego        CEL
+------         ------      ------
+Templates      5           5
+Constraints    10          10
+Setup Time     25.00ms     15.00ms
+Throughput     2000/sec    3500/sec
+Mean Latency   500.00µs    285.00µs
+P95 Latency    1.20ms      600.00µs
+P99 Latency    2.50ms      900.00µs
+Violations     150         150
+
+Performance: CEL is 1.75x faster than Rego
+```
+
+:::note
+Templates without CEL code will be skipped when benchmarking the CEL engine.
+A warning will be displayed indicating which templates were skipped.
+:::
+
+:::caution
+The CEL engine does not support referential constraints. Referential data loading
+is skipped entirely when benchmarking with CEL—this is expected behavior, not an error.
+If you have policies that rely on referential data (e.g., checking if a namespace exists),
+those constraints will not be fully exercised during CEL benchmarks. An informational note
+will be displayed indicating that referential data is not supported by the CEL engine.
+:::
+
+#### Memory Profiling
+
+```shell
+gator bench --filename=policies/ --memory
+```
+
+Adds memory statistics to the output:
+
+```
+Memory (estimated):
+  Allocs/Review:  3000
+  Bytes/Review:   150.00 KB
+  Total Allocs:   15000000
+  Total Bytes:    732.42 MB
+```
+
+:::caution
+Memory statistics are estimates based on `runtime.MemStats` captured before and after benchmark runs. They do not account for garbage collection cycles that may occur during benchmarking. For production memory analysis, use Go's pprof profiler.
+:::
+
+#### Save and Compare Baselines
+
+Save benchmark results as a baseline:
+
+```shell
+gator bench --filename=policies/ --memory --save=baseline.json
+```
+
+Compare future runs against the baseline:
+
+```shell
+gator bench --filename=policies/ --memory --compare=baseline.json
+```
+
+Output includes a comparison table:
+
+```
+=== Baseline Comparison: Rego Engine ===
+
+Metric         Baseline     Current      Delta   Status
+------         --------     -------      -----   ------
+P50 Latency    450.00µs     460.00µs     +2.2%   ✓
+P95 Latency    1.20ms       1.25ms       +4.2%   ✓
+P99 Latency    2.50ms       2.60ms       +4.0%   ✓
+Mean Latency   500.00µs     510.00µs     +2.0%   ✓
+Throughput     2000/sec     1960/sec     -2.0%   ✓
+Allocs/Review  3000         3050         +1.7%   ✓
+Bytes/Review   150.00 KB    152.00 KB    +1.3%   ✓
+
+✓ No significant regressions (threshold: 10.0%)
+```
+
+For fast policies (< 1ms), small percentage changes may be noise. Use `--min-threshold` to set an absolute minimum difference:
+
+```shell
+gator bench --filename=policies/ --compare=baseline.json --threshold=10 --min-threshold=100µs
+```
+
+This marks a metric as passing if either:
+- The percentage change is within the threshold (10%), OR
+- The absolute difference is less than the min-threshold (100µs)
+
+### CI/CD Integration
+
+Use `gator bench` in CI/CD pipelines to detect performance regressions automatically.
+
+#### GitHub Actions Example
+
+```yaml
+name: Policy Benchmark
+
+on:
+  pull_request:
+    paths:
+      - 'policies/**'
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download baseline
+        uses: actions/download-artifact@v4
+        with:
+          name: benchmark-baseline
+          path: .
+        continue-on-error: true  # First run won't have baseline
+
+      - name: Install gator
+        run: |
+          go install github.com/open-policy-agent/gatekeeper/v3/cmd/gator@latest
+
+      - name: Run benchmark
+        run: |
+          if [ -f baseline.json ]; then
+            # Use min-threshold to avoid flaky failures on fast policies
+            gator bench -f policies/ --memory \
+              --compare=baseline.json \
+              --threshold=10 \
+              --min-threshold=100µs
+          else
+            gator bench -f policies/ --memory --save=baseline.json
+          fi
+
+      - name: Upload baseline
+        if: github.ref == 'refs/heads/main'
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-baseline
+          path: baseline.json
+```
+
+:::tip
+Use `--min-threshold` in CI to prevent flaky failures. For policies that evaluate in under 1ms, a 10% regression might only be 50µs of noise from system jitter.
+:::
+
+#### Exit Codes
+
+| Exit Code | Meaning |
+|-----------|---------|
+| `0` | Benchmark completed successfully, no regressions detected |
+| `1` | Error occurred, or regression threshold exceeded (when using `--compare`) |
+
+When `--compare` is used with `--threshold`, the command exits with code `1` if any metric regresses beyond the threshold. This enables CI/CD pipelines to fail builds that introduce performance regressions.
+
+### Understanding Metrics
+
+| Metric | Description |
+|--------|-------------|
+| **P50/P95/P99 Latency** | Percentile latencies per review. P99 of 2ms means 99% of reviews complete in ≤2ms. Use ≥1000 iterations for reliable P99. |
+| **Mean Latency** | Average time per review |
+| **Throughput** | Reviews processed per second |
+| **Allocs/Review** | Memory allocations per review (with `--memory`). Estimate only. |
+| **Bytes/Review** | Bytes allocated per review (with `--memory`). Estimate only. |
+| **Setup Duration** | Time to load templates, constraints, and data |
+
+#### Setup Duration Breakdown
+
+Setup duration includes:
+- **Client Creation**: Initializing the constraint client
+- **Template Compilation**: Compiling Rego/CEL code in ConstraintTemplates
+- **Constraint Loading**: Adding constraints to the client
+- **Data Loading**: Loading all Kubernetes resources into the data cache
+
+:::note
+Data loading adds all provided resources to the constraint client's cache. This is intentional behavior that matches how Gatekeeper evaluates referential constraints—policies that reference other cluster resources (e.g., checking if a namespace exists) need this cached data available during evaluation.
+:::
+
+#### Performance Guidance
+
+- **P99 latency < 100ms** is recommended for production admission webhooks
+- **CEL is typically faster than Rego** for equivalent policies
+- **High memory allocations** may indicate inefficient policy patterns
+- **Setup time** matters for cold starts; consider template compilation cost
+- **Concurrency testing** (`--concurrency=N`) reveals contention issues not visible in sequential runs
+
+### Performance Characteristics
+
+The following characteristics are based on architectural differences between policy engines and general benchmarking principles. Actual numbers will vary based on policy complexity, hardware, and workload.
+
+:::tip
+These insights were generated using the data gathering scripts in the Gatekeeper repository:
+- [`test/gator/bench/scripts/gather-data.sh`](https://github.com/open-policy-agent/gatekeeper/blob/master/test/gator/bench/scripts/gather-data.sh) - Collects benchmark data across different scenarios
+- [`test/gator/bench/scripts/analyze-data.sh`](https://github.com/open-policy-agent/gatekeeper/blob/master/test/gator/bench/scripts/analyze-data.sh) - Analyzes and summarizes the collected data
+
+You can run these scripts locally to validate these characteristics on your own hardware.
+:::
+
+#### CEL vs Rego
+
+| Characteristic | CEL | Rego |
+|----------------|-----|------|
+| **Evaluation Speed** | 1.5-3x faster | Baseline |
+| **Memory per Review** | 20-30% less | Baseline |
+| **Setup/Compilation** | 2-3x slower | Faster |
+| **Best For** | Long-running processes | Cold starts |
+
+**Why the difference?**
+- CEL compiles to more efficient bytecode, resulting in faster evaluation
+- Rego has lighter upfront compilation cost but slower per-evaluation overhead
+- For admission webhooks (long-running), CEL's evaluation speed advantage compounds over time
+
+#### Concurrency Scaling
+
+:::note
+The `--concurrency` flag simulates parallel policy evaluation similar to how Kubernetes admission webhooks handle concurrent requests. In production, Gatekeeper processes multiple admission requests simultaneously, making concurrent benchmarking essential for realistic performance testing.
+:::
+
+- **Linear scaling** up to 4-8 concurrent workers
+- **Diminishing returns** beyond CPU core count
+- **Increased P99 variance** at high concurrency due to contention
+- **Recommendation**: Use 4-8 workers for load testing; match production replica count
+
+```
+Concurrency   Typical Efficiency
+1             100% (baseline)
+2             85-95%
+4             70-85%
+8             50-70%
+16+           <50% (diminishing returns)
+```
+
+#### Benchmarking Best Practices
+
+| Practice | Recommendation | Why |
+|----------|----------------|-----|
+| **Iterations** | ≥1000 | Required for statistically meaningful P99 percentiles |
+| **Warmup** | 10 iterations | Go runtime stabilizes quickly; more warmup has minimal impact |
+| **Multiple Runs** | 3-5 runs | Expect 2-8% variance between identical runs |
+| **P99 vs Mean** | Focus on P99 for SLAs | P99 has higher variance (~8%) than mean (~2%) |
+| **CI Thresholds** | Use `--min-threshold` | Prevents flaky failures from natural variance |
+
+#### Interpreting Results
+
+**Healthy patterns:**
+- P95/P99 within 2-5x of P50 (consistent performance)
+- Memory allocations stable across runs
+- Throughput scales with concurrency up to core count
+
+**Warning signs:**
+- P99 > 10x P50 (high tail latency, possible GC pressure)
+- Memory growing with iteration count (potential leak)
+- Throughput decreasing at low concurrency (contention issue)
+- Large variance between runs (noisy environment or unstable policy)
+
+
 ## Bundling Policy into OCI Artifacts
 
 It may be useful to bundle policy files into OCI Artifacts for ingestion during