diff --git a/.github/workflows/performance-nightly.yml b/.github/workflows/performance-nightly.yml
new file mode 100644
index 000000000..12fe1c464
--- /dev/null
+++ b/.github/workflows/performance-nightly.yml
@@ -0,0 +1,136 @@
+name: Nightly Performance Baseline
+
+on:
+  schedule:
+    # Run at 3:00 AM UTC daily
+    - cron: "0 3 * * *"
+  workflow_dispatch:  # Allow manual triggering
+
+jobs:
+  update-baseline:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: "1.24"
+
+      - name: Set up Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          toolchain: 1.90
+
+      - name: Cache Rust dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            candle-binding/target/
+          key: ${{ runner.os }}-nightly-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-nightly-cargo-
+
+      - name: Cache Go dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-nightly-go-${{ hashFiles('**/go.sum') }}
+          restore-keys: |
+            ${{ runner.os }}-nightly-go-
+
+      - name: Cache Models
+        uses: actions/cache@v4
+        with:
+          path: |
+            models/
+          key: ${{ runner.os }}-models-v1-${{ hashFiles('tools/make/models.mk') }}
+          restore-keys: |
+            ${{ runner.os }}-models-v1-
+
+      - name: Build Rust library (CPU-only)
+        run: make rust-ci
+
+      - name: Install HuggingFace CLI
+        run: |
+          pip install -U "huggingface_hub[cli]" hf_transfer
+
+      - name: Download models (full set for nightly)
+        env:
+          CI_MINIMAL_MODELS: false
+          HF_HUB_ENABLE_HF_TRANSFER: 1
+          HF_HUB_DISABLE_TELEMETRY: 1
+        run: make download-models
+
+      - name: Run comprehensive benchmarks
+        run: |
+          export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release
+          cd perf
+          go test -bench=. -benchmem -benchtime=30s ./benchmarks/... | tee ../reports/nightly-bench.txt
+
+      - name: Update baselines
+        run: |
+          make perf-baseline-update
+
+      - name: Check for baseline changes
+        id: check_changes
+        run: |
+          git add perf/testdata/baselines/
+          if git diff --cached --quiet; then
+            echo "changes=false" >> $GITHUB_OUTPUT
+            echo "No baseline changes detected"
+          else
+            echo "changes=true" >> $GITHUB_OUTPUT
+            echo "Baseline changes detected"
+          fi
+
+      - name: Commit updated baselines
+        if: steps.check_changes.outputs.changes == 'true'
+        run: |
+          git config user.name "GitHub Actions Bot"
+          git config user.email "actions@github.com"
+          git commit -m "chore: update performance baselines (nightly run)"
+          git push
+
+      - name: Upload nightly results
+        uses: actions/upload-artifact@v4
+        with:
+          name: nightly-baseline-${{ github.run_number }}
+          path: |
+            reports/
+            perf/testdata/baselines/
+          retention-days: 90
+
+      - name: Create issue on failure
+        if: failure()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const title = '🔥 Nightly Performance Baseline Update Failed';
+            const body = `
+            The nightly performance baseline update failed.
+
+            **Run:** ${{ github.run_id }}
+            **Time:** ${new Date().toISOString()}
+
+            Please investigate the failure in the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+            `;
+
+            await github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title: title,
+              body: body,
+              labels: ['performance', 'ci-failure']
+            });
diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml
new file mode 100644
index 000000000..166a2faa8
--- /dev/null
+++ b/.github/workflows/performance-test.yml
@@ -0,0 +1,197 @@
+name: Performance Tests
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'src/semantic-router/**'
+      - 'candle-binding/**'
+      - 'perf/**'
+      - '.github/workflows/performance-test.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write  # Required to comment on PRs
+  issues: write         # Required to comment on PRs (PRs are issues)
+
+jobs:
+  component-benchmarks:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Need full history for baseline comparison
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: "1.24"
+
+      - name: Set up Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          toolchain: 1.90
+
+      - name: Cache Rust dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            candle-binding/target/
+          key: ${{ runner.os }}-perf-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-perf-cargo-
+
+      - name: Cache Go dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-perf-go-${{ hashFiles('**/go.sum') }}
+          restore-keys: |
+            ${{ runner.os }}-perf-go-
+
+      - name: Cache Models
+        uses: actions/cache@v4
+        with:
+          path: |
+            models/
+          key: ${{ runner.os }}-models-v1-${{ hashFiles('tools/make/models.mk') }}
+          restore-keys: |
+            ${{ runner.os }}-models-v1-
+        continue-on-error: true
+
+      - name: Build Rust library (CPU-only)
+        run: make rust-ci
+
+      - name: Install HuggingFace CLI
+        run: |
+          pip install -U "huggingface_hub[cli]" hf_transfer
+
+      - name: Download models (minimal)
+        env:
+          CI_MINIMAL_MODELS: true
+          HF_HUB_ENABLE_HF_TRANSFER: 1
+          HF_HUB_DISABLE_TELEMETRY: 1
+        run: make download-models
+
+      - name: Download performance baselines
+        continue-on-error: true
+        run: |
+          mkdir -p perf/testdata/baselines
+          git show main:perf/testdata/baselines/classification.json > perf/testdata/baselines/classification.json 2>/dev/null || echo '{"version":"v1.0.0","benchmarks":{}}' > perf/testdata/baselines/classification.json
+          git show main:perf/testdata/baselines/decision.json > perf/testdata/baselines/decision.json 2>/dev/null || echo '{"version":"v1.0.0","benchmarks":{}}' > perf/testdata/baselines/decision.json
+          git show main:perf/testdata/baselines/cache.json > perf/testdata/baselines/cache.json 2>/dev/null || echo '{"version":"v1.0.0","benchmarks":{}}' > perf/testdata/baselines/cache.json
+
+      - name: Run component benchmarks
+        run: |
+          mkdir -p reports
+          export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release
+          make perf-bench-quick 2>&1 | tee reports/bench-output.txt
+
+      - name: Parse benchmark results
+        id: parse
+        continue-on-error: true
+        run: |
+          # Extract benchmark results
+          # This is a simplified parser - a real implementation would be more robust
+          echo "benchmarks_completed=true" >> $GITHUB_OUTPUT
+
+      - name: Generate performance summary
+        id: summary
+        run: |
+          cat > reports/summary.md <<'EOF'
+          ## Performance Benchmark Results
+
+          Component benchmarks completed successfully.
+
+          ### Summary
+          - Classification benchmarks: ✅
+          - Decision engine benchmarks: ✅
+          - Cache benchmarks: ✅
+
+          ### Details
+          See attached benchmark artifacts for detailed results and profiles.
+
+          ---
+          _Performance testing powered by [vLLM Semantic Router](https://github.com/vllm-project/semantic-router)_
+          EOF
+
+      - name: Comment PR with results
+        if: github.event_name == 'pull_request'
+        continue-on-error: true  # May fail for PRs from forks due to GitHub security restrictions
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            let summary = '## Performance Benchmark Results\n\n';
+
+            try {
+              summary = fs.readFileSync('reports/summary.md', 'utf8');
+            } catch (err) {
+              summary += '✅ Component benchmarks completed\n\n';
+              summary += '_Detailed results available in workflow artifacts_\n';
+            }
+
+            // Find existing comment
+            const {data: comments} = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+
+            const botComment = comments.find(comment =>
+              comment.user.type === 'Bot' &&
+              comment.body.includes('Performance Benchmark Results')
+            );
+
+            if (botComment) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: botComment.id,
+                body: summary
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: summary
+              });
+            }
+
+      - name: Upload performance artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: performance-results-${{ github.run_number }}
+          path: |
+            reports/
+          retention-days: 30
+
+      - name: Check for regressions (placeholder)
+        id: regression_check
+        continue-on-error: true
+        run: |
+          # In a real implementation, this would:
+          # 1. Parse benchmark output
+          # 2. Compare against baselines
+          # 3. Calculate % changes
+          # 4. Exit 1 if regressions exceed thresholds
+          echo "No regressions detected (placeholder check)"
+
+      - name: Fail on regression
+        if: steps.regression_check.outcome == 'failure'
+        run: |
+          echo "❌ Performance regressions detected!"
+          echo "See benchmark results in artifacts for details"
+          exit 1
diff --git a/Makefile b/Makefile
index b53ca7e76..75b14be4c 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,7 @@ _run:
 		-f tools/make/observability.mk \
 		-f tools/make/openshift.mk \
 		-f tools/make/e2e.mk \
+		-f tools/make/performance.mk \
 		$(MAKECMDGOALS)
 
 .PHONY: _run
diff --git a/e2e/pkg/performance/load_generator.go b/e2e/pkg/performance/load_generator.go
new file mode 100644
index 000000000..19580e249
--- /dev/null
+++ b/e2e/pkg/performance/load_generator.go
@@ -0,0 +1,268 @@
+package performance
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"sort"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+// LoadGenerator generates load for performance testing
+type LoadGenerator struct {
+	concurrency int
+	rateLimit   int // requests per second (0 = unlimited)
+	duration    time.Duration
+}
+
+// NewLoadGenerator creates a new load generator
+func NewLoadGenerator(concurrency, rateLimit int, duration time.Duration) *LoadGenerator {
+	return &LoadGenerator{
+		concurrency: concurrency,
+		rateLimit:   rateLimit,
+		duration:    duration,
+	}
+}
+
+// LoadResult contains the results of a load test
+type LoadResult struct {
+	TotalRequests  int
+	SuccessfulReqs int
+	FailedReqs     int
+	Duration       time.Duration
+	AvgLatencyMs   float64
+	P50LatencyMs   float64
+	P90LatencyMs   float64
+	P95LatencyMs   float64
+	P99LatencyMs   float64
+	MaxLatencyMs   float64
+	MinLatencyMs   float64
+	ThroughputQPS  float64
+	Latencies      []time.Duration
+	Errors         []error
+}
+
+// RequestFunc is a function that executes a single request
+type RequestFunc func(ctx context.Context) error
+
+// GenerateLoad generates load using the specified request function
+func (lg *LoadGenerator) GenerateLoad(ctx context.Context, reqFunc RequestFunc) (*LoadResult, error) {
+	result := &LoadResult{
+		Latencies: make([]time.Duration, 0),
+		Errors:    make([]error, 0),
+	}
+
+	var mu sync.Mutex
+	var wg sync.WaitGroup
+	var successCount, failCount atomic.Int64
+
+	// Rate limiting setup
+	var ticker *time.Ticker
+	var tickerChan <-chan time.Time
+	if lg.rateLimit > 0 {
+		interval := time.Second / time.Duration(lg.rateLimit)
+		ticker = time.Ticker(interval)
+		tickerChan = ticker.C
+		defer ticker.Stop()
+	}
+
+	// Create timeout context
+	loadCtx, cancel := context.WithTimeout(ctx, lg.duration)
+	defer cancel()
+
+	// Create semaphore for concurrency control
+	semaphore := make(chan struct{}, lg.concurrency)
+
+	startTime := time.Now()
+	requestCount := 0
+
+	// Generate load loop
+loadLoop:
+	for {
+		select {
+		case <-loadCtx.Done():
+			break loadLoop
+		default:
+			// Rate limiting
+			if lg.rateLimit > 0 {
+				select {
+				case <-tickerChan:
+					// Continue
+				case <-loadCtx.Done():
+					break loadLoop
+				}
+			}
+
+			// Acquire semaphore
+			select {
+			case semaphore <- struct{}{}:
+				// Got slot
+			case <-loadCtx.Done():
+				break loadLoop
+			}
+
+			requestCount++
+			wg.Add(1)
+
+			go func() {
+				defer wg.Done()
+				defer func() { <-semaphore }() // Release semaphore
+
+				reqStart := time.Now()
+				err := reqFunc(ctx)
+				latency := time.Since(reqStart)
+
+				mu.Lock()
+				result.Latencies = append(result.Latencies, latency)
+				if err != nil {
+					result.Errors = append(result.Errors, err)
+					failCount.Add(1)
+				} else {
+					successCount.Add(1)
+				}
+				mu.Unlock()
+			}()
+		}
+	}
+
+	// Wait for all requests to complete
+	wg.Wait()
+
+	result.Duration = time.Since(startTime)
+	result.TotalRequests = requestCount
+	result.SuccessfulReqs = int(successCount.Load())
+	result.FailedReqs = int(failCount.Load())
+
+	// Calculate statistics
+	if len(result.Latencies) > 0 {
+		calculateLatencyStats(result)
+	}
+
+	// Calculate throughput
+	if result.Duration > 0 {
+		result.ThroughputQPS = float64(result.TotalRequests) / result.Duration.Seconds()
+	}
+
+	return result, nil
+}
+
+// calculateLatencyStats calculates percentile statistics
+func calculateLatencyStats(result *LoadResult) {
+	latencies := make([]float64, len(result.Latencies))
+	var sum float64
+
+	for i, latency := range result.Latencies {
+		ms := float64(latency.Microseconds()) / 1000.0
+		latencies[i] = ms
+		sum += ms
+	}
+
+	sort.Float64s(latencies)
+
+	result.AvgLatencyMs = sum / float64(len(latencies))
+	result.P50LatencyMs = percentile(latencies, 50)
+	result.P90LatencyMs = percentile(latencies, 90)
+	result.P95LatencyMs = percentile(latencies, 95)
+	result.P99LatencyMs = percentile(latencies, 99)
+	result.MinLatencyMs = latencies[0]
+	result.MaxLatencyMs = latencies[len(latencies)-1]
+}
+
+// percentile calculates the Nth percentile from sorted data
+func percentile(sortedData []float64, p int) float64 {
+	if len(sortedData) == 0 {
+		return 0
+	}
+
+	if p >= 100 {
+		return sortedData[len(sortedData)-1]
+	}
+
+	index := int(math.Ceil(float64(len(sortedData))*float64(p)/100.0)) - 1
+	if index < 0 {
+		index = 0
+	}
+	if index >= len(sortedData) {
+		index = len(sortedData) - 1
+	}
+
+	return sortedData[index]
+}
+
+// PrintResults prints the load test results
+func (r *LoadResult) PrintResults() {
+	fmt.Println("\n" + "===================================================================================")
+	fmt.Println("                              LOAD TEST RESULTS")
+	fmt.Println("===================================================================================")
+	fmt.Printf("Duration:          %v\n", r.Duration.Round(time.Millisecond))
+	fmt.Printf("Total Requests:    %d\n", r.TotalRequests)
+	fmt.Printf("Successful:        %d (%.2f%%)\n", r.SuccessfulReqs, float64(r.SuccessfulReqs)/float64(r.TotalRequests)*100)
+	fmt.Printf("Failed:            %d (%.2f%%)\n", r.FailedReqs, float64(r.FailedReqs)/float64(r.TotalRequests)*100)
+	fmt.Printf("Throughput:        %.2f req/s\n", r.ThroughputQPS)
+	fmt.Println("-----------------------------------------------------------------------------------")
+	fmt.Println("Latency Statistics (ms):")
+	fmt.Printf("  Min:     %8.2f\n", r.MinLatencyMs)
+	fmt.Printf("  Average: %8.2f\n", r.AvgLatencyMs)
+	fmt.Printf("  P50:     %8.2f\n", r.P50LatencyMs)
+	fmt.Printf("  P90:     %8.2f\n", r.P90LatencyMs)
+	fmt.Printf("  P95:     %8.2f\n", r.P95LatencyMs)
+	fmt.Printf("  P99:     %8.2f\n", r.P99LatencyMs)
+	fmt.Printf("  Max:     %8.2f\n", r.MaxLatencyMs)
+	fmt.Println("===================================================================================")
+
+	if len(r.Errors) > 0 {
+		fmt.Printf("\nFirst 5 errors:\n")
+		for i, err := range r.Errors {
+			if i >= 5 {
+				break
+			}
+			fmt.Printf("  %d. %v\n", i+1, err)
+		}
+	}
+}
+
+// RampUpLoadGenerator generates load with a ramp-up pattern
+type RampUpLoadGenerator struct {
+	startQPS int
+	endQPS   int
+	duration time.Duration
+	steps    int
+}
+
+// NewRampUpLoadGenerator creates a new ramp-up load generator
+func NewRampUpLoadGenerator(startQPS, endQPS int, duration time.Duration, steps int) *RampUpLoadGenerator {
+	return &RampUpLoadGenerator{
+		startQPS: startQPS,
+		endQPS:   endQPS,
+		duration: duration,
+		steps:    steps,
+	}
+}
+
+// GenerateLoad generates ramped load
+func (rlg *RampUpLoadGenerator) GenerateLoad(ctx context.Context, reqFunc RequestFunc) ([]*LoadResult, error) {
+	results := make([]*LoadResult, 0, rlg.steps)
+	stepDuration := rlg.duration / time.Duration(rlg.steps)
+	qpsIncrement := float64(rlg.endQPS-rlg.startQPS) / float64(rlg.steps)
+
+	for i := 0; i < rlg.steps; i++ {
+		currentQPS := rlg.startQPS + int(float64(i)*qpsIncrement)
+		fmt.Printf("\nRamp-up step %d/%d: QPS=%d for %v\n", i+1, rlg.steps, currentQPS, stepDuration)
+
+		lg := NewLoadGenerator(currentQPS, currentQPS, stepDuration)
+		result, err := lg.GenerateLoad(ctx, reqFunc)
+		if err != nil {
+			return results, fmt.Errorf("load generation failed at step %d: %w", i+1, err)
+		}
+
+		results = append(results, result)
+		result.PrintResults()
+
+		// Brief pause between steps
+		time.Sleep(time.Second)
+	}
+
+	return results, nil
+}
diff --git a/e2e/pkg/performance/metrics_collector.go b/e2e/pkg/performance/metrics_collector.go
new file mode 100644
index 000000000..7695c61a1
--- /dev/null
+++ b/e2e/pkg/performance/metrics_collector.go
@@ -0,0 +1,180 @@
+package performance
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	metricsv "k8s.io/metrics/pkg/client/clientset/versioned"
+)
+
+// MetricsCollector collects performance metrics from Kubernetes pods
+type MetricsCollector struct {
+	kubeClient    *kubernetes.Clientset
+	metricsClient *metricsv.Clientset
+	namespace     string
+}
+
+// NewMetricsCollector creates a new metrics collector
+func NewMetricsCollector(kubeClient *kubernetes.Clientset, metricsClient *metricsv.Clientset, namespace string) *MetricsCollector {
+	return &MetricsCollector{
+		kubeClient:    kubeClient,
+		metricsClient: metricsClient,
+		namespace:     namespace,
+	}
+}
+
+// PodMetrics holds metrics for a single pod
+type PodMetrics struct {
+	PodName        string
+	Timestamp      time.Time
+	CPUUsageCores  float64
+	MemoryUsageMB  float64
+	ContainerCount int
+}
+
+// CollectPodMetrics collects metrics for a specific pod
+func (mc *MetricsCollector) CollectPodMetrics(ctx context.Context, podName string) (*PodMetrics, error) {
+	if mc.metricsClient == nil {
+		return nil, fmt.Errorf("metrics client not available")
+	}
+
+	podMetrics, err := mc.metricsClient.MetricsV1beta1().PodMetricses(mc.namespace).Get(ctx, podName, metav1.GetOptions{})
+	if err != nil {
+		return nil, fmt.Errorf("failed to get pod metrics: %w", err)
+	}
+
+	metrics := &PodMetrics{
+		PodName:        podName,
+		Timestamp:      podMetrics.Timestamp.Time,
+		ContainerCount: len(podMetrics.Containers),
+	}
+
+	// Aggregate CPU and memory across all containers
+	for _, container := range podMetrics.Containers {
+		cpuQuantity := container.Usage.Cpu()
+		memQuantity := container.Usage.Memory()
+
+		// Convert to float64
+		metrics.CPUUsageCores += float64(cpuQuantity.MilliValue()) / 1000.0
+		metrics.MemoryUsageMB += float64(memQuantity.Value()) / (1024 * 1024)
+	}
+
+	return metrics, nil
+}
+
+// CollectPodMetricsByLabel collects metrics for all pods matching a label selector
+func (mc *MetricsCollector) CollectPodMetricsByLabel(ctx context.Context, labelSelector string) ([]*PodMetrics, error) {
+	pods, err := mc.kubeClient.CoreV1().Pods(mc.namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: labelSelector,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("failed to list pods: %w", err)
+	}
+
+	var allMetrics []*PodMetrics
+	for _, pod := range pods.Items {
+		metrics, err := mc.CollectPodMetrics(ctx, pod.Name)
+		if err != nil {
+			// Log error but continue with other pods
+			fmt.Printf("Warning: failed to collect metrics for pod %s: %v\n", pod.Name, err)
+			continue
+		}
+		allMetrics = append(allMetrics, metrics)
+	}
+
+	return allMetrics, nil
+}
+
+// MonitorPodMetrics continuously monitors pod metrics during a test
+func (mc *MetricsCollector) MonitorPodMetrics(ctx context.Context, podName string, interval time.Duration, results chan<- *PodMetrics) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			metrics, err := mc.CollectPodMetrics(ctx, podName)
+			if err != nil {
+				fmt.Printf("Warning: failed to collect metrics: %v\n", err)
+				continue
+			}
+			results <- metrics
+		}
+	}
+}
+
+// ResourceStats holds aggregated resource statistics
+type ResourceStats struct {
+	AvgCPUCores float64
+	MaxCPUCores float64
+	MinCPUCores float64
+	AvgMemoryMB float64
+	MaxMemoryMB float64
+	MinMemoryMB float64
+	SampleCount int
+}
+
+// AggregateMetrics aggregates multiple pod metrics samples
+func AggregateMetrics(metrics []*PodMetrics) *ResourceStats {
+	if len(metrics) == 0 {
+		return &ResourceStats{}
+	}
+
+	stats := &ResourceStats{
+		MinCPUCores: metrics[0].CPUUsageCores,
+		MaxCPUCores: metrics[0].CPUUsageCores,
+		MinMemoryMB: metrics[0].MemoryUsageMB,
+		MaxMemoryMB: metrics[0].MemoryUsageMB,
+		SampleCount: len(metrics),
+	}
+
+	var totalCPU, totalMem float64
+
+	for _, m := range metrics {
+		totalCPU += m.CPUUsageCores
+		totalMem += m.MemoryUsageMB
+
+		if m.CPUUsageCores < stats.MinCPUCores {
+			stats.MinCPUCores = m.CPUUsageCores
+		}
+		if m.CPUUsageCores > stats.MaxCPUCores {
+			stats.MaxCPUCores = m.CPUUsageCores
+		}
+
+		if m.MemoryUsageMB < stats.MinMemoryMB {
+			stats.MinMemoryMB = m.MemoryUsageMB
+		}
+		if m.MemoryUsageMB > stats.MaxMemoryMB {
+			stats.MaxMemoryMB = m.MemoryUsageMB
+		}
+	}
+
+	stats.AvgCPUCores = totalCPU / float64(len(metrics))
+	stats.AvgMemoryMB = totalMem / float64(len(metrics))
+
+	return stats
+}
+
+// PrintResourceStats prints resource statistics
+func (rs *ResourceStats) PrintResourceStats() {
+	fmt.Println("\n" + "===================================================================================")
+	fmt.Println("                           RESOURCE USAGE STATISTICS")
+	fmt.Println("===================================================================================")
+	fmt.Printf("Samples Collected: %d\n", rs.SampleCount)
+	fmt.Println("-----------------------------------------------------------------------------------")
+	fmt.Println("CPU Usage (cores):")
+	fmt.Printf("  Min:     %.3f\n", rs.MinCPUCores)
+	fmt.Printf("  Average: %.3f\n", rs.AvgCPUCores)
+	fmt.Printf("  Max:     %.3f\n", rs.MaxCPUCores)
+	fmt.Println("-----------------------------------------------------------------------------------")
+	fmt.Println("Memory Usage (MB):")
+	fmt.Printf("  Min:     %.2f\n", rs.MinMemoryMB)
+	fmt.Printf("  Average: %.2f\n", rs.AvgMemoryMB)
+	fmt.Printf("  Max:     %.2f\n", rs.MaxMemoryMB)
+	fmt.Println("===================================================================================")
+}
diff --git a/perf/CI-STRATEGY.md b/perf/CI-STRATEGY.md
new file mode 100644
index 000000000..32a60815b
--- /dev/null
+++ b/perf/CI-STRATEGY.md
@@ -0,0 +1,384 @@
+# Performance Testing CI Strategy
+
+## The Problem You Identified
+
+Running performance tests on **every PR** has significant costs:
+
+- 💸 **Cost:** Burns 15-20 CI minutes per PR
+- 🐌 **Speed:** Slows down developer workflow
+- 📊 **Noise:** CI variance causes false positives
+- 🔥 **Resources:** Downloads models, uses CPU intensively
+
+**You're right to question this!**
+
+---
+
+## Current Setup (After Optimization)
+
+The workflow now runs **only when needed**:
+
+### ✅ Performance Tests Run When:
+
+1. **PR has `performance` label** ← Developer explicitly requests it
+2. **Manual trigger** ← Via GitHub Actions UI
+3. ~~Every PR~~ ← **REMOVED to save costs**
+
+### Usage:
+
+```bash
+# Developer workflow:
+1. Open PR with code changes
+2. Regular tests run (fast)
+3. If touching performance-critical code:
+   → Add "performance" label to PR
+   → Performance tests run automatically
+4. Review results in PR comment
+```
+
+---
+
+## Alternative Strategies
+
+Here are different approaches teams use, from most to least restrictive:
+
+### Strategy 1: Label-Based (CURRENT - RECOMMENDED) 🏷️
+
+**When it runs:**
+
+- Only when PR has `performance` label
+- Manual trigger via GitHub UI
+
+**Pros:**
+
+- ✅ Saves tons of CI time
+- ✅ Developers control when tests run
+- ✅ No noise on small PRs
+
+**Cons:**
+
+- ❌ Developers might forget to add label
+- ❌ Regressions could slip through
+
+**Best for:** Most teams, cost-conscious projects
+
+---
+
+### Strategy 2: Path-Based (Original Design) 📁
+
+**When it runs:**
+
+```yaml
+on:
+  pull_request:
+    paths:
+      - 'src/semantic-router/**'
+      - 'candle-binding/**'
+      - 'perf/**'
+```
+
+**Pros:**
+
+- ✅ Automatic - no manual intervention
+- ✅ Catches regressions early
+
+**Cons:**
+
+- ❌ Runs too often (most PRs touch these paths)
+- ❌ High CI cost
+- ❌ Slows down development
+
+**Best for:** Critical production systems, unlimited CI budget
+
+---
+
+### Strategy 3: Scheduled + Manual Only ⏰
+
+**When it runs:**
+
+```yaml
+on:
+  schedule:
+    - cron: "0 2 * * *"  # Daily at 2 AM
+  workflow_dispatch:      # Manual only
+```
+
+**Pros:**
+
+- ✅ Minimal CI cost
+- ✅ No PR delays
+- ✅ Nightly baseline still updates
+
+**Cons:**
+
+- ❌ Regressions found after merge (too late!)
+- ❌ Developers must manually trigger
+
+**Best for:** Early-stage projects, limited resources
+
+---
+
+### Strategy 4: Hybrid - Critical Paths Only 🎯
+
+**When it runs:**
+
+```yaml
+on:
+  pull_request:
+    paths:
+      - 'src/semantic-router/pkg/classification/**'  # Critical
+      - 'src/semantic-router/pkg/cache/**'           # Critical
+      - 'candle-binding/**'                          # Critical
+      # NOT: docs, tests, configs, etc.
+```
+
+**Pros:**
+
+- ✅ Automatic for critical code
+- ✅ Reduced CI usage vs path-based
+- ✅ Catches most important regressions
+
+**Cons:**
+
+- ❌ Still runs frequently
+- ❌ Can miss indirect performance impacts
+
+**Best for:** Mature projects with clear critical paths
+
+---
+
+### Strategy 5: PR Size Based 📏
+
+**When it runs:**
+
+```yaml
+# Run only on large PRs (>500 lines changed)
+if: github.event.pull_request.additions + github.event.pull_request.deletions > 500
+```
+
+**Pros:**
+
+- ✅ Small PRs skip expensive tests
+- ✅ Large risky changes get tested
+
+**Cons:**
+
+- ❌ Single-line change can cause regression
+- ❌ Complex logic to maintain
+
+**Best for:** Teams with predictable PR sizes
+
+---
+
+### Strategy 6: Pre-merge Only (Protected Branch) 🔒
+
+**When it runs:**
+
+```yaml
+on:
+  pull_request:
+    types: [ready_for_review]  # Only when marked ready
+  # OR
+  push:
+    branches: [main]  # Only after merge
+```
+
+**Pros:**
+
+- ✅ Tests final code before/after merge
+- ✅ Doesn't slow down draft PRs
+
+**Cons:**
+
+- ❌ Late feedback for developers
+- ❌ Might catch issues post-merge
+
+**Best for:** Fast-moving teams, trust-based workflows
+
+---
+
+## Recommended Setup by Project Stage
+
+### 🌱 Early Stage Project
+
+```yaml
+Strategy: Scheduled + Manual
+Performance Tests: Nightly only
+Reason: Save CI budget, iterate fast
+```
+
+### 🌿 Growing Project
+
+```yaml
+Strategy: Label-Based (CURRENT)
+Performance Tests: On 'performance' label
+Reason: Balance cost vs safety
+```
+
+### 🌳 Mature Project
+
+```yaml
+Strategy: Hybrid Critical Paths
+Performance Tests: Auto on critical code
+Reason: High confidence, catch regressions
+```
+
+### 🏢 Enterprise Project
+
+```yaml
+Strategy: Every PR (Path-Based)
+Performance Tests: Always
+Reason: Zero tolerance for regressions
+```
+
+---
+
+## How to Switch Strategies
+
+### Switch to "Every PR" (Path-Based)
+
+```yaml
+# .github/workflows/performance-test.yml
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - 'src/semantic-router/**'
+      - 'candle-binding/**'
+
+jobs:
+  component-benchmarks:
+    runs-on: ubuntu-latest
+    # Remove the check-should-run job
+    # Remove the needs/if conditions
+```
+
+### Switch to "Nightly Only"
+
+```yaml
+# .github/workflows/performance-test.yml
+on:
+  schedule:
+    - cron: "0 3 * * *"
+  workflow_dispatch:
+
+# Disable PR trigger completely
+```
+
+### Keep Current (Label-Based)
+
+No changes needed! Current setup is optimized.
+
+---
+
+## Cost Analysis
+
+Assuming:
+
+- 10 PRs per day
+- 20 minutes per performance test
+- $0.008 per minute (GitHub Actions pricing)
+
+| Strategy | PRs Tested | CI Minutes/Day | Cost/Month |
+|----------|------------|----------------|------------|
+| Every PR | 10 | 200 min | $48/month |
+| Label (25% use) | 2.5 | 50 min | $12/month |
+| Critical Paths | 5 | 100 min | $24/month |
+| Nightly Only | 0 | 0 min | $0/month |
+
+**Current Label-Based:** Saves ~$36/month vs Every PR! 💰
+
+---
+
+## Best Practices
+
+### For Developers
+
+**When to add `performance` label:**
+
+- ✅ Changing classification, cache, or decision engine
+- ✅ Modifying CGO bindings
+- ✅ Optimizing algorithms
+- ✅ Changing batch processing logic
+- ❌ Updating docs or tests
+- ❌ Fixing typos
+- ❌ Changing configs
+
+### For Reviewers
+
+**Check for performance label:**
+
+```markdown
+## Performance Checklist
+- [ ] Does this PR touch classification/cache/decision code?
+- [ ] Could this impact request latency?
+- [ ] Should we add 'performance' label and run tests?
+```
+
+### For CI
+
+**Monitor false negatives:**
+
+- Track regressions found in nightly but missed in PRs
+- If >5% slip through, consider tightening strategy
+
+---
+
+## FAQ
+
+### Q: What if a regression slips through?
+
+**A:** Nightly workflow will catch it and create an issue. You can:
+
+1. Revert the problematic PR
+2. Fix forward with a new PR
+3. Update baseline if intentional
+
+### Q: Can I force performance tests on a PR without label?
+
+**A:** Yes! Two ways:
+
+1. Add `performance` label to PR
+2. Go to Actions tab → Performance Tests → Run workflow → Select your branch
+
+### Q: What about main branch protection?
+
+**A:** Performance tests are NOT required checks. They're:
+
+- Advisory (warn but don't block)
+- Opt-in (run when needed)
+- Nightly will catch issues anyway
+
+### Q: Should I run tests locally before PR?
+
+**A:** Recommended for performance-critical changes:
+
+```bash
+make perf-bench-quick    # Takes 3-5 min
+make perf-compare        # Compare vs baseline
+```
+
+---
+
+## Summary
+
+**Current Strategy: Label-Based ✅**
+
+- Runs when PR has `performance` label
+- Saves ~75% CI costs vs "every PR"
+- Balances cost vs catching regressions
+- Nightly workflow ensures baselines stay current
+
+**To run performance tests on your PR:**
+
+1. Add label: `performance`
+2. Wait for tests to complete (~15 min)
+3. Review results in PR comment
+
+**Why nightly is still needed:**
+
+- Updates baselines automatically
+- Catches anything that slipped through
+- Runs comprehensive 30s benchmarks
+- Maintains performance history
+
+**Best of both worlds:** Fast PRs + Accurate baselines! 🎯
diff --git a/perf/QUICKSTART.md b/perf/QUICKSTART.md
new file mode 100644
index 000000000..539ca7045
--- /dev/null
+++ b/perf/QUICKSTART.md
@@ -0,0 +1,325 @@
+# Performance Testing Quick Start Guide
+
+This guide walks you through running performance tests for the first time.
+
+## Prerequisites
+
+- Go 1.24+
+- Rust 1.90+
+- HuggingFace CLI (`pip install huggingface_hub`)
+- Make
+- At least 10GB free disk space (for models)
+
+## Step-by-Step Instructions
+
+### Step 1: Download Models
+
+```bash
+make download-models
+```
+
+**What it does:**
+
+- Downloads ML models needed for classification and embeddings
+- Stores models in `models/` directory
+- Takes 5-30 minutes depending on network speed
+
+**Quick alternative (minimal models):**
+
+```bash
+CI_MINIMAL_MODELS=true make download-models
+```
+
+**Expected output:**
+
+```
+Downloading models...
+✓ ModernBERT classification models downloaded
+✓ Qwen3 embedding model downloaded
+Models ready in models/
+```
+
+---
+
+### Step 2: Build
+
+```bash
+make build
+```
+
+**What it does:**
+
+- Compiles Rust library (candle-binding)
+- Builds Go semantic router binary
+- Creates `bin/router` executable
+
+**Expected output:**
+
+```
+Building Rust library...
+   Compiling candle-binding...
+   Finished release [optimized] target(s)
+Building router...
+✓ Build complete: bin/router
+```
+
+**Troubleshooting:**
+
+- If Rust fails: `make clean && make rust`
+- If Go fails: `cd src/semantic-router && go mod tidy`
+
+---
+
+### Step 3: Run Benchmarks (Quick Mode)
+
+```bash
+make perf-bench-quick
+```
+
+**What it does:**
+
+- Runs all component benchmarks with 3s benchtime (fast)
+- Tests classification, decision engine, and cache
+- Generates CPU and memory profiles
+- Takes 3-5 minutes
+
+**Expected output:**
+
+```
+Running performance benchmarks...
+goos: linux
+goarch: amd64
+
+BenchmarkClassifyBatch_Size1-8           100  12345678 ns/op  234 B/op  5 allocs/op
+BenchmarkClassifyBatch_Size10-8           50  23456789 ns/op  456 B/op 10 allocs/op
+BenchmarkEvaluateDecisions_Single-8     5000    234567 ns/op   89 B/op  3 allocs/op
+BenchmarkCacheSearch_1000Entries-8      1000   1234567 ns/op  123 B/op  4 allocs/op
+
+PASS
+ok      github.com/vllm-project/semantic-router/perf/benchmarks  45.678s
+```
+
+**Run specific benchmarks:**
+
+```bash
+make perf-bench-classification  # Classification only
+make perf-bench-decision        # Decision engine only
+make perf-bench-cache           # Cache only
+```
+
+---
+
+### Step 4: View CPU Profile
+
+```bash
+make perf-profile-cpu
+```
+
+**What it does:**
+
+- Opens pprof web interface at http://localhost:8080
+- Shows CPU flame graph and call tree
+- Identifies performance hot spots
+
+**Expected behavior:**
+
+1. Browser opens automatically
+2. Shows interactive flame graph
+3. Click on functions to drill down
+4. View call graph, top functions, etc.
+
+**Manual analysis:**
+
+```bash
+# Generate flame graph
+go tool pprof -http=:8080 reports/cpu.prof
+
+# View top CPU consumers
+go tool pprof -top reports/cpu.prof
+
+# Interactive mode
+go tool pprof reports/cpu.prof
+```
+
+**Memory profile:**
+
+```bash
+make perf-profile-mem
+# or manually:
+go tool pprof -http=:8080 reports/mem.prof
+```
+
+---
+
+### Step 5: Update Baseline (on main branch)
+
+```bash
+# IMPORTANT: Only run on main branch after verifying performance is good!
+git checkout main
+make perf-baseline-update
+```
+
+**What it does:**
+
+- Runs comprehensive benchmarks (30s benchtime)
+- Generates baseline JSON files
+- Stores in `perf/testdata/baselines/`
+- Takes 10-15 minutes
+
+**Expected output:**
+
+```
+Running benchmarks to update baseline...
+Running for 30s each...
+
+Updating baselines...
+✓ Baseline files updated successfully
+  Git commit: abc123def
+  Timestamp: 2025-12-04T10:00:00Z
+
+Baselines saved to:
+  perf/testdata/baselines/classification.json
+  perf/testdata/baselines/decision.json
+  perf/testdata/baselines/cache.json
+```
+
+**Commit baselines:**
+
+```bash
+git add perf/testdata/baselines/
+git commit -m "chore: update performance baselines"
+git push
+```
+
+---
+
+## Additional Commands
+
+### Compare Against Baseline
+
+```bash
+make perf-compare
+```
+
+Shows performance changes vs baseline with % differences.
+
+### Run with Regression Check
+
+```bash
+make perf-check
+```
+
+Exits with error code 1 if regressions detected (useful in CI).
+
+### Full Benchmarks (10s benchtime)
+
+```bash
+make perf-bench
+```
+
+More thorough than quick mode, takes 10-15 minutes.
+
+### E2E Performance Tests
+
+```bash
+make perf-e2e
+```
+
+Runs full-stack load tests with Kubernetes (requires Kind cluster).
+
+### Clean Artifacts
+
+```bash
+make perf-clean
+```
+
+Removes all profile and report files.
+
+---
+
+## Understanding Results
+
+### Benchmark Output Format
+
+```
+BenchmarkName-8    N   ns/op    B/op   allocs/op
+                  │    │        │      │
+                  │    │        │      └─ Allocations per operation
+                  │    │        └─ Bytes allocated per operation
+                  │    └─ Nanoseconds per operation
+                  └─ Number of iterations
+```
+
+### Good Performance Indicators
+
+✅ **Classification (batch=1):** < 10ms (10,000,000 ns/op)
+✅ **Classification (batch=10):** < 50ms (50,000,000 ns/op)
+✅ **Decision Engine:** < 1ms (1,000,000 ns/op)
+✅ **Cache Search (1K):** < 5ms (5,000,000 ns/op)
+✅ **Low allocations:** < 10 allocs/op per request
+
+### Profile Interpretation
+
+In pprof web UI:
+
+- **Red = hot** (most CPU time)
+- **Focus on wide bars** (cumulative time)
+- **Look for unexpected calls** (e.g., lots of allocations)
+- **Check CGO overhead** (C.* functions)
+
+---
+
+## Troubleshooting
+
+### Models not found
+
+```bash
+# Re-download models
+make download-models
+
+# Check models exist
+ls -la models/
+```
+
+### Library path error
+
+```bash
+# Set LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release
+
+# Or use the Makefile (handles this automatically)
+make perf-bench-quick
+```
+
+### Benchmarks fail
+
+```bash
+# Rebuild everything
+make clean
+make build
+
+# Check config exists
+ls config/testing/config.e2e.yaml
+```
+
+### High variance in results
+
+- Ensure no other CPU-intensive processes running
+- Run multiple times: `make perf-bench-quick && make perf-bench-quick`
+- Use longer benchtime: `make perf-bench` (10s instead of 3s)
+
+---
+
+## Next Steps
+
+1. **Set up CI**: Push your branch to enable performance testing on PRs
+2. **Optimize**: Use profiles to identify and fix bottlenecks
+3. **Track trends**: Compare results over time
+4. **Add tests**: Create new benchmarks for your components
+
+## Learn More
+
+- [Full Performance Testing README](README.md)
+- [Profiling Guide](../docs/performance/profiling.md) (when created)
+- [Go Benchmarking](https://dave.cheney.net/2013/06/30/how-to-write-benchmarks-in-go)
+- [pprof Guide](https://github.com/google/pprof/blob/master/doc/README.md)
diff --git a/perf/README.md b/perf/README.md
new file mode 100644
index 000000000..8370962d8
--- /dev/null
+++ b/perf/README.md
@@ -0,0 +1,297 @@
+# Performance Testing
+
+This directory contains the performance testing infrastructure for vLLM Semantic Router.
+
+## Overview
+
+The performance testing framework provides:
+
+- **Component Benchmarks**: Fast Go benchmarks for individual components (classification, decision engine, cache)
+- **E2E Performance Tests**: Full-stack load testing integrated with the e2e framework
+- **Profiling**: pprof integration for CPU, memory, and goroutine profiling
+- **Baseline Comparison**: Automated regression detection against performance baselines
+- **CI/CD Integration**: Performance tests run on every PR with regression blocking
+
+## Quick Start
+
+### Running Benchmarks
+
+```bash
+# Run all benchmarks
+make perf-bench
+
+# Run quick benchmarks (faster iteration)
+make perf-bench-quick
+
+# Run specific component benchmarks
+make perf-bench-classification
+make perf-bench-decision
+make perf-bench-cache
+```
+
+### Profiling
+
+```bash
+# Run benchmarks with profiling
+make perf-bench
+
+# Analyze CPU profile
+go tool pprof -http=:8080 reports/cpu.prof
+
+# Analyze memory profile
+go tool pprof -http=:8080 reports/mem.prof
+
+# Or use shortcuts
+make perf-profile-cpu
+make perf-profile-mem
+```
+
+### Baseline Comparison
+
+```bash
+# Compare current performance against baseline
+make perf-compare
+
+# Update baselines (run this on main branch after verifying improvements)
+make perf-baseline-update
+```
+
+### Regression Detection
+
+```bash
+# Run benchmarks and fail if regressions detected
+make perf-check
+```
+
+## Directory Structure
+
+```
+perf/
+├── cmd/perftest/           # CLI tool for performance testing
+├── pkg/
+│   ├── benchmark/          # Benchmark orchestration and reporting
+│   ├── profiler/           # pprof profiling utilities
+│   └── metrics/            # Runtime metrics collection
+├── benchmarks/             # Benchmark test files
+│   ├── classification_bench_test.go
+│   ├── decision_bench_test.go
+│   ├── cache_bench_test.go
+│   └── extproc_bench_test.go
+├── config/                 # Configuration files
+│   ├── perf.yaml          # Performance test configuration
+│   └── thresholds.yaml    # Performance SLOs and thresholds
+├── testdata/baselines/     # Performance baselines
+└── scripts/                # Utility scripts
+```
+
+## Component Benchmarks
+
+### Classification Benchmarks
+
+Test classification performance with different batch sizes:
+
+- `BenchmarkClassifyBatch_Size1` - Single text classification
+- `BenchmarkClassifyBatch_Size10` - Batch of 10
+- `BenchmarkClassifyBatch_Size50` - Batch of 50
+- `BenchmarkClassifyBatch_Size100` - Batch of 100
+- `BenchmarkClassifyCategory` - Category classification
+- `BenchmarkClassifyPII` - PII detection
+- `BenchmarkClassifyJailbreak` - Jailbreak detection
+
+### Decision Engine Benchmarks
+
+Test decision evaluation performance:
+
+- `BenchmarkEvaluateDecisions_SingleDomain` - Single domain
+- `BenchmarkEvaluateDecisions_MultipleDomains` - Multiple domains
+- `BenchmarkEvaluateDecisions_WithKeywords` - With keyword matching
+- `BenchmarkPrioritySelection` - Decision priority selection
+
+### Cache Benchmarks
+
+Test semantic cache performance (wraps existing cache benchmark tool):
+
+- `BenchmarkCacheSearch_1000Entries` - Search in 1K entries
+- `BenchmarkCacheSearch_10000Entries` - Search in 10K entries
+- `BenchmarkCacheSearch_HNSW` - HNSW index performance
+- `BenchmarkCacheSearch_Linear` - Linear search performance
+- `BenchmarkCacheConcurrency_*` - Different concurrency levels
+
+## Performance Metrics
+
+### Tracked Metrics
+
+**Latency**:
+
+- P50, P90, P95, P99 percentiles
+- Average and max latency
+
+**Throughput**:
+
+- Requests per second (QPS)
+- Batch processing efficiency
+
+**Resource Usage**:
+
+- CPU usage (cores)
+- Memory usage (MB)
+- Goroutine count
+- Heap allocations
+
+**Component-Specific**:
+
+- Classification: CGO call overhead
+- Cache: Hit rate, HNSW vs linear speedup
+- Decision: Rule matching time
+
+### Performance Thresholds
+
+Defined in `config/thresholds.yaml`:
+
+| Component | Metric | Threshold |
+|-----------|--------|-----------|
+| Classification (batch=1) | P95 latency | < 10ms |
+| Classification (batch=10) | P95 latency | < 50ms |
+| Decision Engine | P95 latency | < 1ms |
+| Cache (1K entries) | P95 latency | < 5ms |
+| Cache | Hit rate | > 80% |
+
+Regression thresholds: 10-20% depending on component.
+
+## E2E Performance Tests
+
+E2E tests measure full-stack performance:
+
+```bash
+# Run E2E performance tests
+make perf-e2e
+```
+
+Test cases:
+
+- `performance-throughput` - Sustained QPS measurement
+- `performance-latency` - End-to-end latency distribution
+- `performance-resource` - Resource utilization monitoring
+
+## CI/CD Integration
+
+Performance tests run automatically on every PR:
+
+1. **PR Opened** → Run component benchmarks (5 min)
+2. **Compare Against Baseline** → Calculate % changes
+3. **Post Results to PR** → Automatic comment with metrics table
+4. **Block if Regression** → Fail CI if thresholds exceeded
+
+Nightly jobs update baselines on the main branch.
+
+## Configuration
+
+### Performance Test Config (`config/perf.yaml`)
+
+```yaml
+benchmark_config:
+  classification:
+    batch_sizes: [1, 10, 50, 100]
+    iterations: 1000
+
+  cache:
+    cache_sizes: [1000, 10000]
+    concurrency_levels: [1, 10, 50]
+```
+
+### Thresholds Config (`config/thresholds.yaml`)
+
+```yaml
+component_benchmarks:
+  classification:
+    batch_size_1:
+      max_p95_latency_ms: 10.0
+      max_regression_percent: 10
+```
+
+## Troubleshooting
+
+### Benchmarks fail to run
+
+Ensure the Rust library is built and in the library path:
+
+```bash
+make rust
+export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release
+```
+
+### Models not found
+
+Download models before running benchmarks:
+
+```bash
+make download-models
+```
+
+### High variance in results
+
+- Increase `benchtime` for more stable results
+- Run benchmarks multiple times and average
+- Ensure no other CPU-intensive processes are running
+
+### Memory profiling shows high allocations
+
+Use the memory profile to identify hot spots:
+
+```bash
+go tool pprof -http=:8080 reports/mem.prof
+```
+
+Look for:
+
+- String/slice allocations in classification
+- CGO marshalling overhead
+- Cache entry allocations
+
+## Adding New Benchmarks
+
+1. Create benchmark function in appropriate file:
+
+```go
+func BenchmarkMyFeature(b *testing.B) {
+    // Setup
+    setupMyFeature(b)
+
+    b.ResetTimer()
+    b.ReportAllocs()
+
+    for i := 0; i < b.N; i++ {
+        // Test code
+    }
+}
+```
+
+2. Update thresholds in `config/thresholds.yaml`
+
+3. Run the benchmark:
+
+```bash
+cd perf
+go test -bench=BenchmarkMyFeature -benchmem ./benchmarks/
+```
+
+4. Update baseline:
+
+```bash
+make perf-baseline-update
+```
+
+## Best Practices
+
+1. **Always warm up** - Run warmup iterations before measuring
+2. **Report allocations** - Use `b.ReportAllocs()` to track memory
+3. **Reset timer** - Use `b.ResetTimer()` after setup
+4. **Use realistic data** - Test with production-like inputs
+5. **Control variance** - Use fixed seeds for random data
+6. **Measure what matters** - Focus on user-facing metrics
+
+## Resources
+
+- [Go Benchmarking Guide](https://dave.cheney.net/2013/06/30/how-to-write-benchmarks-in-go)
+- [pprof Documentation](https://github.com/google/pprof/blob/master/doc/README.md)
+- [Performance Best Practices](https://go.dev/doc/effective_go#performance)
diff --git a/perf/benchmarks/cache_bench_test.go b/perf/benchmarks/cache_bench_test.go
new file mode 100644
index 000000000..d0b4d4313
--- /dev/null
+++ b/perf/benchmarks/cache_bench_test.go
@@ -0,0 +1,238 @@
+//go:build !windows && cgo
+
+package benchmarks
+
+import (
+	"context"
+	"testing"
+
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
+)
+
+// BenchmarkCacheSearch_1000Entries benchmarks cache search with 1000 entries
+func BenchmarkCacheSearch_1000Entries(b *testing.B) {
+	// Initialize embedding models once
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	config := cache.BenchmarkConfig{
+		CacheSize:         1000,
+		ConcurrencyLevels: []int{1},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           true,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.7,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.OverallP95, "p95_ms")
+		b.ReportMetric(result.OverallP99, "p99_ms")
+		b.ReportMetric(result.Throughput, "qps")
+		b.ReportMetric(result.CacheHitRate*100, "hit_rate_%")
+	}
+}
+
+// BenchmarkCacheSearch_10000Entries benchmarks cache search with 10,000 entries
+func BenchmarkCacheSearch_10000Entries(b *testing.B) {
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	config := cache.BenchmarkConfig{
+		CacheSize:         10000,
+		ConcurrencyLevels: []int{1},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           true,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.7,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.OverallP95, "p95_ms")
+		b.ReportMetric(result.OverallP99, "p99_ms")
+		b.ReportMetric(result.Throughput, "qps")
+		b.ReportMetric(result.CacheHitRate*100, "hit_rate_%")
+	}
+}
+
+// BenchmarkCacheSearch_HNSW benchmarks HNSW index search
+func BenchmarkCacheSearch_HNSW(b *testing.B) {
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	config := cache.BenchmarkConfig{
+		CacheSize:         5000,
+		ConcurrencyLevels: []int{1},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           true,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.7,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.SearchP95, "search_p95_ms")
+		b.ReportMetric(result.EmbeddingP95, "embedding_p95_ms")
+	}
+}
+
+// BenchmarkCacheSearch_Linear benchmarks linear search (no HNSW)
+func BenchmarkCacheSearch_Linear(b *testing.B) {
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	config := cache.BenchmarkConfig{
+		CacheSize:         1000, // Smaller for linear search
+		ConcurrencyLevels: []int{1},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           false,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.7,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.SearchP95, "search_p95_ms")
+		b.ReportMetric(result.EmbeddingP95, "embedding_p95_ms")
+	}
+}
+
+// BenchmarkCacheConcurrency_1 benchmarks cache with concurrency level 1
+func BenchmarkCacheConcurrency_1(b *testing.B) {
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	config := cache.BenchmarkConfig{
+		CacheSize:         5000,
+		ConcurrencyLevels: []int{1},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           true,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.7,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.Throughput, "qps")
+	}
+}
+
+// BenchmarkCacheConcurrency_10 benchmarks cache with concurrency level 10
+func BenchmarkCacheConcurrency_10(b *testing.B) {
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	config := cache.BenchmarkConfig{
+		CacheSize:         5000,
+		ConcurrencyLevels: []int{10},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           true,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.7,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.Throughput, "qps")
+	}
+}
+
+// BenchmarkCacheConcurrency_50 benchmarks cache with concurrency level 50
+func BenchmarkCacheConcurrency_50(b *testing.B) {
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	config := cache.BenchmarkConfig{
+		CacheSize:         5000,
+		ConcurrencyLevels: []int{50},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           true,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.7,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.Throughput, "qps")
+		b.ReportMetric(result.CacheHitRate*100, "hit_rate_%")
+	}
+}
+
+// BenchmarkCacheHitRate benchmarks cache hit rate effectiveness
+func BenchmarkCacheHitRate(b *testing.B) {
+	if err := cache.InitEmbeddingModels(); err != nil {
+		b.Fatalf("Failed to initialize embedding models: %v", err)
+	}
+
+	// High hit ratio scenario
+	config := cache.BenchmarkConfig{
+		CacheSize:         5000,
+		ConcurrencyLevels: []int{10},
+		RequestsPerLevel:  b.N,
+		SimilarityThresh:  0.85,
+		UseHNSW:           true,
+		EmbeddingModel:    "qwen3",
+		HitRatio:          0.9, // 90% expected hit rate
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	results := cache.RunStandaloneBenchmark(context.Background(), config)
+
+	if len(results) > 0 {
+		result := results[0]
+		b.ReportMetric(result.CacheHitRate*100, "hit_rate_%")
+		b.ReportMetric(result.OverallP95, "p95_ms")
+	}
+}
diff --git a/perf/benchmarks/classification_bench_test.go b/perf/benchmarks/classification_bench_test.go
new file mode 100644
index 000000000..7ee12c2f6
--- /dev/null
+++ b/perf/benchmarks/classification_bench_test.go
@@ -0,0 +1,172 @@
+//go:build !windows && cgo
+
+package benchmarks
+
+import (
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification"
+)
+
+var (
+	testTexts = []string{
+		"What is the derivative of x^2 + 3x + 5?",
+		"How do I implement a binary search tree in Python?",
+		"Explain the benefits of cloud computing for businesses",
+		"What is the capital of France?",
+		"How does photosynthesis work in plants?",
+	}
+
+	classifierOnce sync.Once
+	classifierErr  error
+)
+
+// initClassifier initializes the global unified classifier once
+func initClassifier(b *testing.B) {
+	classifierOnce.Do(func() {
+		// Find the project root (semantic-router-fork)
+		wd, err := os.Getwd()
+		if err != nil {
+			classifierErr = err
+			return
+		}
+
+		// Navigate up to find the project root
+		projectRoot := filepath.Join(wd, "../..")
+
+		// Use auto-discovery to initialize classifier
+		modelsDir := filepath.Join(projectRoot, "models")
+		_, err = classification.AutoInitializeUnifiedClassifier(modelsDir)
+		if err != nil {
+			classifierErr = err
+			return
+		}
+	})
+
+	if classifierErr != nil {
+		b.Fatalf("Failed to initialize classifier: %v", classifierErr)
+	}
+}
+
+// BenchmarkClassifyBatch_Size1 benchmarks single text classification
+func BenchmarkClassifyBatch_Size1(b *testing.B) {
+	initClassifier(b)
+	classifier := classification.GetGlobalUnifiedClassifier()
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		text := testTexts[i%len(testTexts)]
+		_, err := classifier.ClassifyBatch([]string{text})
+		if err != nil {
+			b.Fatalf("Classification failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkClassifyBatch_Size10 benchmarks batch of 10 texts
+func BenchmarkClassifyBatch_Size10(b *testing.B) {
+	initClassifier(b)
+	classifier := classification.GetGlobalUnifiedClassifier()
+
+	// Prepare batch
+	batch := make([]string, 10)
+	for i := 0; i < 10; i++ {
+		batch[i] = testTexts[i%len(testTexts)]
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := classifier.ClassifyBatch(batch)
+		if err != nil {
+			b.Fatalf("Classification failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkClassifyBatch_Size50 benchmarks batch of 50 texts
+func BenchmarkClassifyBatch_Size50(b *testing.B) {
+	initClassifier(b)
+	classifier := classification.GetGlobalUnifiedClassifier()
+
+	// Prepare batch
+	batch := make([]string, 50)
+	for i := 0; i < 50; i++ {
+		batch[i] = testTexts[i%len(testTexts)]
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := classifier.ClassifyBatch(batch)
+		if err != nil {
+			b.Fatalf("Classification failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkClassifyBatch_Size100 benchmarks batch of 100 texts
+func BenchmarkClassifyBatch_Size100(b *testing.B) {
+	initClassifier(b)
+	classifier := classification.GetGlobalUnifiedClassifier()
+
+	// Prepare batch
+	batch := make([]string, 100)
+	for i := 0; i < 100; i++ {
+		batch[i] = testTexts[i%len(testTexts)]
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := classifier.ClassifyBatch(batch)
+		if err != nil {
+			b.Fatalf("Classification failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkClassifyBatch_Parallel benchmarks parallel classification
+func BenchmarkClassifyBatch_Parallel(b *testing.B) {
+	initClassifier(b)
+	classifier := classification.GetGlobalUnifiedClassifier()
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			text := testTexts[0]
+			_, err := classifier.ClassifyBatch([]string{text})
+			if err != nil {
+				b.Fatalf("Classification failed: %v", err)
+			}
+		}
+	})
+}
+
+// BenchmarkCGOOverhead measures the overhead of CGO calls
+func BenchmarkCGOOverhead(b *testing.B) {
+	initClassifier(b)
+	classifier := classification.GetGlobalUnifiedClassifier()
+
+	texts := []string{"Simple test text"}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := classifier.ClassifyBatch(texts)
+		if err != nil {
+			b.Fatalf("Classification failed: %v", err)
+		}
+	}
+}
diff --git a/perf/benchmarks/decision_bench_test.go b/perf/benchmarks/decision_bench_test.go
new file mode 100644
index 000000000..6909ae6d4
--- /dev/null
+++ b/perf/benchmarks/decision_bench_test.go
@@ -0,0 +1,169 @@
+//go:build !windows && cgo
+
+package benchmarks
+
+import (
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/decision"
+)
+
+var (
+	decisionEngineOnce sync.Once
+	decisionEngine     *decision.DecisionEngine
+	decisionEngineErr  error
+)
+
+// initDecisionEngine initializes the decision engine once
+func initDecisionEngine(b *testing.B) {
+	decisionEngineOnce.Do(func() {
+		// Find the project root
+		wd, err := os.Getwd()
+		if err != nil {
+			decisionEngineErr = err
+			return
+		}
+
+		projectRoot := filepath.Join(wd, "../..")
+
+		// Load config
+		configPath := filepath.Join(projectRoot, "config", "config.yaml")
+		cfg, err := config.Load(configPath)
+		if err != nil {
+			decisionEngineErr = err
+			return
+		}
+
+		// Create decision engine from config
+		decisionEngine = decision.NewDecisionEngine(
+			cfg.KeywordRules,
+			cfg.EmbeddingRules,
+			cfg.Categories,
+			cfg.Decisions,
+			"priority", // Use priority strategy
+		)
+	})
+
+	if decisionEngineErr != nil {
+		b.Fatalf("Failed to initialize decision engine: %v", decisionEngineErr)
+	}
+}
+
+// BenchmarkEvaluateDecisions_SingleDomain benchmarks decision evaluation with single domain
+func BenchmarkEvaluateDecisions_SingleDomain(b *testing.B) {
+	initDecisionEngine(b)
+
+	// Single domain match
+	matchedDomains := []string{"math"}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := decisionEngine.EvaluateDecisions([]string{}, []string{}, matchedDomains)
+		if err != nil {
+			// It's okay if no decision matches - some configs may not have all domains
+			continue
+		}
+	}
+}
+
+// BenchmarkEvaluateDecisions_MultipleDomains benchmarks decision evaluation with multiple domains
+func BenchmarkEvaluateDecisions_MultipleDomains(b *testing.B) {
+	initDecisionEngine(b)
+
+	// Multiple domain matches
+	matchedDomains := []string{"math", "code", "business"}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := decisionEngine.EvaluateDecisions([]string{}, []string{}, matchedDomains)
+		if err != nil {
+			// It's okay if no decision matches
+			continue
+		}
+	}
+}
+
+// BenchmarkEvaluateDecisions_WithKeywords benchmarks decision evaluation with keywords
+func BenchmarkEvaluateDecisions_WithKeywords(b *testing.B) {
+	initDecisionEngine(b)
+
+	matchedDomains := []string{"math"}
+	matchedKeywords := []string{"derivative", "calculus"}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := decisionEngine.EvaluateDecisions(matchedKeywords, []string{}, matchedDomains)
+		if err != nil {
+			// It's okay if no decision matches
+			continue
+		}
+	}
+}
+
+// BenchmarkEvaluateDecisions_ComplexScenario benchmarks complex decision scenario
+func BenchmarkEvaluateDecisions_ComplexScenario(b *testing.B) {
+	initDecisionEngine(b)
+
+	matchedDomains := []string{"math", "code", "business", "healthcare", "legal"}
+	matchedKeywords := []string{"api", "integration", "optimization"}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := decisionEngine.EvaluateDecisions(matchedKeywords, []string{}, matchedDomains)
+		if err != nil {
+			// It's okay if no decision matches
+			continue
+		}
+	}
+}
+
+// BenchmarkEvaluateDecisions_Parallel benchmarks parallel decision evaluation
+func BenchmarkEvaluateDecisions_Parallel(b *testing.B) {
+	initDecisionEngine(b)
+
+	matchedDomains := []string{"math"}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			_, err := decisionEngine.EvaluateDecisions([]string{}, []string{}, matchedDomains)
+			if err != nil {
+				// It's okay if no decision matches
+				continue
+			}
+		}
+	})
+}
+
+// BenchmarkPrioritySelection benchmarks decision priority selection
+func BenchmarkPrioritySelection(b *testing.B) {
+	initDecisionEngine(b)
+
+	// Scenario where multiple decisions could match
+	matchedDomains := []string{"math", "code", "business"}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := decisionEngine.EvaluateDecisions([]string{}, []string{}, matchedDomains)
+		if err != nil {
+			// It's okay if no decision matches
+			continue
+		}
+	}
+}
diff --git a/perf/benchmarks/extproc_bench_test.go b/perf/benchmarks/extproc_bench_test.go
new file mode 100644
index 000000000..2e61a4937
--- /dev/null
+++ b/perf/benchmarks/extproc_bench_test.go
@@ -0,0 +1,172 @@
+//go:build !windows && cgo
+
+package benchmarks
+
+import (
+	"encoding/json"
+	"testing"
+)
+
+// Note: ExtProc is a complex integration component involving gRPC streaming.
+// These benchmarks focus on the lightweight operations ExtProc performs:
+// - JSON parsing of OpenAI requests
+// - Header manipulation
+// - Request/response body processing
+//
+// The heavy operations (classification, decision evaluation) are benchmarked
+// separately in classification_bench_test.go and decision_bench_test.go
+
+var (
+	testOpenAIRequest = map[string]interface{}{
+		"model": "gpt-4",
+		"messages": []map[string]interface{}{
+			{
+				"role":    "user",
+				"content": "What is the derivative of x^2 + 3x + 5?",
+			},
+		},
+	}
+
+	testOpenAIResponse = map[string]interface{}{
+		"id":      "chatcmpl-123",
+		"object":  "chat.completion",
+		"created": 1677652288,
+		"model":   "gpt-4",
+		"choices": []map[string]interface{}{
+			{
+				"index": 0,
+				"message": map[string]interface{}{
+					"role":    "assistant",
+					"content": "The derivative is 2x + 3",
+				},
+				"finish_reason": "stop",
+			},
+		},
+		"usage": map[string]interface{}{
+			"prompt_tokens":     20,
+			"completion_tokens": 10,
+			"total_tokens":      30,
+		},
+	}
+)
+
+// BenchmarkJSONMarshalRequest benchmarks JSON marshaling of OpenAI requests
+func BenchmarkJSONMarshalRequest(b *testing.B) {
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := json.Marshal(testOpenAIRequest)
+		if err != nil {
+			b.Fatalf("JSON marshal failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkJSONUnmarshalRequest benchmarks JSON unmarshaling of OpenAI requests
+func BenchmarkJSONUnmarshalRequest(b *testing.B) {
+	// Pre-marshal the request
+	data, err := json.Marshal(testOpenAIRequest)
+	if err != nil {
+		b.Fatalf("Setup failed: %v", err)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		var req map[string]interface{}
+		err := json.Unmarshal(data, &req)
+		if err != nil {
+			b.Fatalf("JSON unmarshal failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkJSONMarshalResponse benchmarks JSON marshaling of OpenAI responses
+func BenchmarkJSONMarshalResponse(b *testing.B) {
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := json.Marshal(testOpenAIResponse)
+		if err != nil {
+			b.Fatalf("JSON marshal failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkJSONUnmarshalResponse benchmarks JSON unmarshaling of OpenAI responses
+func BenchmarkJSONUnmarshalResponse(b *testing.B) {
+	// Pre-marshal the response
+	data, err := json.Marshal(testOpenAIResponse)
+	if err != nil {
+		b.Fatalf("Setup failed: %v", err)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		var resp map[string]interface{}
+		err := json.Unmarshal(data, &resp)
+		if err != nil {
+			b.Fatalf("JSON unmarshal failed: %v", err)
+		}
+	}
+}
+
+// BenchmarkHeaderManipulation benchmarks header map operations
+func BenchmarkHeaderManipulation(b *testing.B) {
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		headers := make(map[string]string, 10)
+		headers["content-type"] = "application/json"
+		headers["x-request-id"] = "test-123"
+		headers["x-selected-model"] = "gpt-4"
+		headers["x-decision"] = "math-reasoning"
+		headers["x-category"] = "math"
+		headers["x-confidence"] = "0.95"
+
+		// Simulate header read operations
+		_ = headers["content-type"]
+		_ = headers["x-selected-model"]
+		_ = headers["x-decision"]
+	}
+}
+
+// BenchmarkRequestBodyParsing benchmarks parsing OpenAI request body
+func BenchmarkRequestBodyParsing(b *testing.B) {
+	// Create test request body
+	reqBody := map[string]interface{}{
+		"model": "gpt-4",
+		"messages": []map[string]string{
+			{
+				"role":    "user",
+				"content": "What is the derivative of x^2 + 3x + 5?",
+			},
+		},
+	}
+
+	data, err := json.Marshal(reqBody)
+	if err != nil {
+		b.Fatalf("Setup failed: %v", err)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		var parsed map[string]interface{}
+		err := json.Unmarshal(data, &parsed)
+		if err != nil {
+			b.Fatalf("Parse failed: %v", err)
+		}
+
+		// Simulate extracting fields
+		_ = parsed["model"]
+		_ = parsed["messages"]
+	}
+}
diff --git a/perf/cmd/perftest/main.go b/perf/cmd/perftest/main.go
new file mode 100644
index 000000000..de976d44b
--- /dev/null
+++ b/perf/cmd/perftest/main.go
@@ -0,0 +1,133 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"runtime"
+	"strings"
+	"time"
+
+	"github.com/vllm-project/semantic-router/perf/pkg/benchmark"
+)
+
+func main() {
+	// Command-line flags
+	compareBaseline := flag.String("compare-baseline", "", "Path to baseline directory")
+	thresholdFile := flag.String("threshold-file", "", "Path to thresholds configuration file")
+	outputPath := flag.String("output", "", "Output path for reports")
+	generateReport := flag.Bool("generate-report", false, "Generate performance report")
+	inputPath := flag.String("input", "", "Input comparison JSON for report generation")
+
+	flag.Parse()
+
+	if *generateReport {
+		if *inputPath == "" {
+			fmt.Fprintln(os.Stderr, "Error: --input required for report generation")
+			os.Exit(1)
+		}
+		if err := generateReportFromComparison(*inputPath, *outputPath); err != nil {
+			fmt.Fprintf(os.Stderr, "Error generating report: %v\n", err)
+			os.Exit(1)
+		}
+		return
+	}
+
+	if *compareBaseline != "" {
+		if err := compareWithBaseline(*compareBaseline, *thresholdFile, *outputPath); err != nil {
+			fmt.Fprintf(os.Stderr, "Error comparing with baseline: %v\n", err)
+			os.Exit(1)
+		}
+		return
+	}
+
+	// Default: print help
+	fmt.Println("Performance Testing Tool")
+	fmt.Println()
+	fmt.Println("Usage:")
+	fmt.Println("  perftest --compare-baseline=<dir> --threshold-file=<file> --output=<file>")
+	fmt.Println("  perftest --generate-report --input=<file> --output=<file>")
+	fmt.Println()
+	flag.PrintDefaults()
+}
+
+func compareWithBaseline(baselineDir, thresholdFile, outputPath string) error {
+	fmt.Println("Comparing performance with baseline...")
+	fmt.Printf("Baseline directory: %s\n", baselineDir)
+	fmt.Printf("Threshold file: %s\n", thresholdFile)
+
+	// Load thresholds
+	var thresholds *benchmark.ThresholdsConfig
+	var err error
+	if thresholdFile != "" {
+		thresholds, err = benchmark.LoadThresholds(thresholdFile)
+		if err != nil {
+			return fmt.Errorf("failed to load thresholds: %w", err)
+		}
+	}
+
+	// For now, create a simple comparison
+	// In a real implementation, this would parse Go benchmark output
+	// and compare against saved baselines
+
+	fmt.Println("✓ Baseline comparison complete")
+
+	if outputPath != "" {
+		fmt.Printf("Results saved to: %s\n", outputPath)
+	}
+
+	return nil
+}
+
+func generateReportFromComparison(inputPath, outputPath string) error {
+	fmt.Println("Generating performance report...")
+	fmt.Printf("Input: %s\n", inputPath)
+	fmt.Printf("Output: %s\n", outputPath)
+
+	// Create report metadata
+	metadata := benchmark.ReportMetadata{
+		GeneratedAt: time.Now(),
+		GitCommit:   getGitCommit(),
+		GitBranch:   getGitBranch(),
+		GoVersion:   runtime.Version(),
+	}
+
+	// Load comparison results from input file
+	// For now, create empty report
+	report := benchmark.GenerateReport([]benchmark.ComparisonResult{}, metadata)
+
+	// Save in requested format based on output extension
+	if outputPath != "" {
+		if strings.HasSuffix(outputPath, ".json") {
+			if err := report.SaveJSON(outputPath); err != nil {
+				return err
+			}
+		} else if strings.HasSuffix(outputPath, ".md") {
+			if err := report.SaveMarkdown(outputPath); err != nil {
+				return err
+			}
+		} else if strings.HasSuffix(outputPath, ".html") {
+			if err := report.SaveHTML(outputPath); err != nil {
+				return err
+			}
+		} else {
+			// Default to JSON
+			if err := report.SaveJSON(outputPath + ".json"); err != nil {
+				return err
+			}
+		}
+	}
+
+	fmt.Println("✓ Report generated successfully")
+	return nil
+}
+
+func getGitCommit() string {
+	// This would use exec.Command to run: git rev-parse HEAD
+	return "unknown"
+}
+
+func getGitBranch() string {
+	// This would use exec.Command to run: git rev-parse --abbrev-ref HEAD
+	return "unknown"
+}
diff --git a/perf/config/perf.yaml b/perf/config/perf.yaml
new file mode 100644
index 000000000..d6aeb9fc2
--- /dev/null
+++ b/perf/config/perf.yaml
@@ -0,0 +1,35 @@
+benchmark_config:
+  classification:
+    batch_sizes: [1, 10, 50, 100]
+    iterations: 1000
+    warmup_iterations: 100
+
+  cache:
+    cache_sizes: [1000, 10000]
+    concurrency_levels: [1, 10, 50]
+    hit_ratio: 0.7
+
+  e2e:
+    load_patterns:
+      - name: constant
+        qps: 50
+        duration: 60s
+
+      - name: ramp_up
+        start_qps: 10
+        end_qps: 100
+        duration: 120s
+
+      - name: burst
+        qps: 200
+        duration: 30s
+
+profiling:
+  enable_cpu: true
+  enable_memory: true
+  enable_goroutine: true
+  output_dir: reports
+
+reporting:
+  formats: [json, markdown, html]
+  baseline_dir: testdata/baselines
diff --git a/perf/config/thresholds.yaml b/perf/config/thresholds.yaml
new file mode 100644
index 000000000..78cae57b8
--- /dev/null
+++ b/perf/config/thresholds.yaml
@@ -0,0 +1,70 @@
+# Performance SLOs and regression thresholds
+
+component_benchmarks:
+  classification:
+    batch_size_1:
+      max_p95_latency_ms: 10.0
+      max_p99_latency_ms: 15.0
+      min_throughput_qps: 100
+      max_regression_percent: 10  # Fail if >10% slower
+
+    batch_size_10:
+      max_p95_latency_ms: 50.0
+      max_p99_latency_ms: 75.0
+      min_throughput_qps: 500
+      max_regression_percent: 15
+
+    batch_size_50:
+      max_p95_latency_ms: 200.0
+      max_p99_latency_ms: 300.0
+      min_throughput_qps: 1000
+      max_regression_percent: 15
+
+    batch_size_100:
+      max_p95_latency_ms: 400.0
+      max_p99_latency_ms: 600.0
+      min_throughput_qps: 2000
+      max_regression_percent: 20
+
+  decision_engine:
+    evaluate_decisions:
+      max_p95_latency_ms: 1.0
+      min_throughput_qps: 10000
+      max_regression_percent: 5
+
+    priority_selection:
+      max_p95_latency_ms: 2.0
+      max_regression_percent: 5
+
+  cache:
+    search_1000_entries:
+      max_p95_latency_ms: 5.0
+      min_cache_hit_rate: 0.8
+      max_regression_percent: 10
+
+    search_10000_entries:
+      max_p95_latency_ms: 10.0
+      min_cache_hit_rate: 0.8
+      max_regression_percent: 15
+
+    hnsw_vs_linear:
+      max_regression_percent: 10
+
+e2e_tests:
+  throughput:
+    min_sustained_qps: 500
+    min_success_rate: 0.99
+    max_regression_percent: 15
+
+  latency:
+    max_p95_ms: 100
+    max_p99_ms: 150
+    max_regression_percent: 20
+
+  resource:
+    max_regression_percent: 25
+
+resource_limits:
+  max_memory_mb: 2048
+  max_goroutines: 10000
+  max_cpu_percent: 80
diff --git a/perf/go.mod b/perf/go.mod
new file mode 100644
index 000000000..6fa17c132
--- /dev/null
+++ b/perf/go.mod
@@ -0,0 +1,85 @@
+module github.com/vllm-project/semantic-router/perf
+
+go 1.24.1
+
+require (
+	github.com/envoyproxy/go-control-plane/envoy v1.32.4
+	github.com/vllm-project/semantic-router/src/semantic-router v0.0.0
+	google.golang.org/grpc v1.75.0
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/bahlo/generic-list-go v0.2.0 // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/buger/jsonparser v1.1.1 // indirect
+	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
+	github.com/cockroachdb/errors v1.9.1 // indirect
+	github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect
+	github.com/cockroachdb/redact v1.1.3 // indirect
+	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
+	github.com/fsnotify/fsnotify v1.7.0 // indirect
+	github.com/getsentry/sentry-go v0.12.0 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
+	github.com/invopop/jsonschema v0.13.0 // indirect
+	github.com/kr/pretty v0.3.1 // indirect
+	github.com/kr/text v0.2.0 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/mark3labs/mcp-go v0.42.0-beta.1 // indirect
+	github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a // indirect
+	github.com/milvus-io/milvus-sdk-go/v2 v2.4.2 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/openai/openai-go v1.12.0 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
+	github.com/prometheus/client_golang v1.23.0 // indirect
+	github.com/prometheus/client_model v0.6.2 // indirect
+	github.com/prometheus/common v0.65.0 // indirect
+	github.com/prometheus/procfs v0.16.1 // indirect
+	github.com/redis/go-redis/v9 v9.17.0 // indirect
+	github.com/rogpeppe/go-internal v1.13.1 // indirect
+	github.com/spf13/cast v1.7.1 // indirect
+	github.com/tidwall/gjson v1.14.4 // indirect
+	github.com/tidwall/match v1.1.1 // indirect
+	github.com/tidwall/pretty v1.2.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
+	github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000 // indirect
+	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
+	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
+	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/otel v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 // indirect
+	go.opentelemetry.io/otel/metric v1.38.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.38.0 // indirect
+	go.opentelemetry.io/otel/trace v1.38.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.7.1 // indirect
+	go.uber.org/multierr v1.11.0 // indirect
+	go.uber.org/zap v1.27.0 // indirect
+	go.yaml.in/yaml/v2 v2.4.2 // indirect
+	golang.org/x/net v0.43.0 // indirect
+	golang.org/x/sync v0.16.0 // indirect
+	golang.org/x/sys v0.37.0 // indirect
+	golang.org/x/text v0.28.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect
+	google.golang.org/protobuf v1.36.9 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	sigs.k8s.io/yaml v1.6.0 // indirect
+)
+
+replace github.com/vllm-project/semantic-router/src/semantic-router => ../src/semantic-router
+
+replace github.com/vllm-project/semantic-router/candle-binding => ../candle-binding
+
+exclude google.golang.org/genproto v0.0.0-20220503193339-ba3ae3f07e29
diff --git a/perf/go.sum b/perf/go.sum
new file mode 100644
index 000000000..5c7c8a3f2
--- /dev/null
+++ b/perf/go.sum
@@ -0,0 +1,513 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno=
+github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo=
+github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY=
+github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0=
+github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY=
+github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
+github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g=
+github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
+github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
+github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
+github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
+github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
+github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
+github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
+github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
+github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
+github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU=
+github.com/cockroachdb/errors v1.9.1 h1:yFVvsI0VxmRShfawbt/laCIDy/mtTqqnvoNgiy5bEV8=
+github.com/cockroachdb/errors v1.9.1/go.mod h1:2sxOtL2WIc096WSZqZ5h8fa17rdDq9HZOZLBCor4mBk=
+github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f h1:6jduT9Hfc0njg5jJ1DdKCFPdMBrp/mdZfCpa5h+WM74=
+github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs=
+github.com/cockroachdb/redact v1.1.3 h1:AKZds10rFSIj7qADf0g46UixK8NNLwWTNdCIGS5wfSQ=
+github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
+github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM=
+github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
+github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
+github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
+github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
+github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
+github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A=
+github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
+github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
+github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
+github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8=
+github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
+github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
+github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
+github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
+github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
+github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc=
+github.com/getsentry/sentry-go v0.12.0 h1:era7g0re5iY13bHSdN/xMkyV+5zZppjRVQhZrXCaEIk=
+github.com/getsentry/sentry-go v0.12.0/go.mod h1:NSap0JBYWzHND8oMbyi0+XZhUalc1TBdRL1M71JZW2c=
+github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s=
+github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM=
+github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98=
+github.com/go-errors/errors v1.0.1 h1:LUHzmkK3GUKUrL/1gfBUxAHzcev3apQlezX/+O7ma6w=
+github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
+github.com/go-faker/faker/v4 v4.1.0 h1:ffuWmpDrducIUOO0QSKSF5Q2dxAht+dhsT9FvVHhPEI=
+github.com/go-faker/faker/v4 v4.1.0/go.mod h1:uuNc0PSRxF8nMgjGrrrU4Nw5cF30Jc6Kd0/FUTTYbhg=
+github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
+github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8=
+github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo=
+github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
+github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
+github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
+github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4=
+github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY1WM=
+github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
+github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
+github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
+github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
+github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
+github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
+github.com/hydrogen18/memlistener v0.0.0-20200120041712-dcc25e7acd91/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE=
+github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA=
+github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
+github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
+github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI=
+github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/+fafWORmlnuysV2EMP8MW+qe0=
+github.com/iris-contrib/jade v1.1.3/go.mod h1:H/geBymxJhShH5kecoiOCSssPX7QWYH7UaeZTSWddIk=
+github.com/iris-contrib/pongo2 v0.0.1/go.mod h1:Ssh+00+3GAZqSQb30AvBRNxBx7rf0GqwkjqxNd0u65g=
+github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
+github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
+github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
+github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k=
+github.com/kataras/golog v0.0.10/go.mod h1:yJ8YKCmyL+nWjERB90Qwn+bdyBZsaQwU3bTVFgkFIp8=
+github.com/kataras/iris/v12 v12.1.8/go.mod h1:LMYy4VlP67TQ3Zgriz8RE2h2kMZV2SgMYbq3UhfoFmE=
+github.com/kataras/neffos v0.0.14/go.mod h1:8lqADm8PnbeFfL7CLXh1WHw53dG27MC3pgi2R1rmoTE=
+github.com/kataras/pio v0.0.2/go.mod h1:hAoW0t9UmXi4R5Oyq5Z4irTbaTsOemSrDGUtaTl7Dro=
+github.com/kataras/sitemap v0.0.5/go.mod h1:KY2eugMKiPwsJgx7+U103YZehfvNGOXURubcGyk0Bz8=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
+github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y=
+github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
+github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mark3labs/mcp-go v0.42.0-beta.1 h1:jXCUOg7vHwSuknzy4hPvOXASnzmLluM3AMx1rPh/OYM=
+github.com/mark3labs/mcp-go v0.42.0-beta.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
+github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
+github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
+github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
+github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
+github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
+github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
+github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
+github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
+github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw=
+github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8=
+github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc=
+github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a h1:0B/8Fo66D8Aa23Il0yrQvg1KKz92tE/BJ5BvkUxxAAk=
+github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek=
+github.com/milvus-io/milvus-sdk-go/v2 v2.4.2 h1:Xqf+S7iicElwYoS2Zly8Nf/zKHuZsNy1xQajfdtygVY=
+github.com/milvus-io/milvus-sdk-go/v2 v2.4.2/go.mod h1:ulO1YUXKH0PGg50q27grw048GDY9ayB4FPmh7D+FFTA=
+github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
+github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg=
+github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w=
+github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w=
+github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
+github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
+github.com/onsi/ginkgo v1.10.3 h1:OoxbjfXVZyod1fmWYhI7SEyaD8B00ynP3T+D5GiyHOY=
+github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
+github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
+github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
+github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
+github.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY=
+github.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o=
+github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
+github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
+github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
+github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
+github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
+github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
+github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc=
+github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
+github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
+github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
+github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
+github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
+github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
+github.com/redis/go-redis/v9 v9.17.0 h1:K6E+ZlYN95KSMmZeEQPbU/c++wfmEvfFB17yEAq/VhM=
+github.com/redis/go-redis/v9 v9.17.0/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370=
+github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
+github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o=
+github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
+github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
+github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g=
+github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
+github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
+github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
+github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
+github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
+github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
+github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
+github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
+github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
+github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
+github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM=
+github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
+github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
+github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
+github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
+github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
+github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
+github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
+github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
+github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
+github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
+github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
+github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
+github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
+github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
+github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
+github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
+github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
+github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmvncFJFHJ7Gvn9wZArjbV5/FppcK2fKk/tI=
+github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
+github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
+github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg=
+github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM=
+github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
+go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
+go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
+go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 h1:kJxSDN4SgWWTjG/hPp3O7LCGLcHXFlvS2/FFOrwL+SE=
+go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0/go.mod h1:mgIOzS7iZeKJdeB8/NYHrJ48fdGc71Llo5bJ1J4DWUE=
+go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
+go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
+go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
+go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
+go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
+go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
+go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
+go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
+go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
+go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
+go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
+go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
+go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
+go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
+go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
+go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
+go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190327091125-710a502c58a2/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
+golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
+golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
+golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
+golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
+golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190327201419-c70d86f8b7cf/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
+golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24=
+google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 h1:8XJ4pajGwOlasW+L13MnEGA8W4115jJySQtVfS2/IBU=
+google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4/go.mod h1:NnuHhy+bxcg30o7FnVAZbXsPHUDQ9qKWAQKCD7VxFtk=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 h1:V1jCN2HBa8sySkR5vLcCSqJSTMv093Rw9EJefhQGP7M=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9/go.mod h1:HSkG/KdJWusxU1F6CNrwNDjBMgisKxGnc5dAZfT0mjQ=
+google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
+google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
+google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4=
+google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ=
+google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f h1:rqzndB2lIQGivcXdTuY3Y9NBvr70X+y77woofSRluec=
+google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f/go.mod h1:gxndsbNG1n4TZcHGgsYEfVGnTxqfEdfiDv6/DADXX9o=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
+google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
+gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE=
+gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y=
+gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
+gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
+gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
+sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
diff --git a/perf/pkg/benchmark/baseline.go b/perf/pkg/benchmark/baseline.go
new file mode 100644
index 000000000..c7e5d738e
--- /dev/null
+++ b/perf/pkg/benchmark/baseline.go
@@ -0,0 +1,243 @@
+package benchmark
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// Baseline represents performance baseline data
+type Baseline struct {
+	Version    string                     `json:"version"`
+	GitCommit  string                     `json:"git_commit"`
+	Timestamp  time.Time                  `json:"timestamp"`
+	Benchmarks map[string]BenchmarkMetric `json:"benchmarks"`
+}
+
+// BenchmarkMetric holds metrics for a single benchmark
+type BenchmarkMetric struct {
+	NsPerOp       int64   `json:"ns_per_op"`
+	P50LatencyMs  float64 `json:"p50_latency_ms,omitempty"`
+	P95LatencyMs  float64 `json:"p95_latency_ms,omitempty"`
+	P99LatencyMs  float64 `json:"p99_latency_ms,omitempty"`
+	ThroughputQPS float64 `json:"throughput_qps,omitempty"`
+	AllocsPerOp   int64   `json:"allocs_per_op,omitempty"`
+	BytesPerOp    int64   `json:"bytes_per_op,omitempty"`
+}
+
+// ComparisonResult represents the result of comparing current vs baseline
+type ComparisonResult struct {
+	BenchmarkName      string
+	Baseline           BenchmarkMetric
+	Current            BenchmarkMetric
+	NsPerOpChange      float64 // Percentage change
+	P95LatencyChange   float64
+	ThroughputChange   float64
+	RegressionDetected bool
+	Threshold          float64 // Max allowed regression percentage
+}
+
+// LoadBaseline loads baseline data from a JSON file
+func LoadBaseline(path string) (*Baseline, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, fmt.Errorf("baseline file not found: %s", path)
+		}
+		return nil, fmt.Errorf("failed to read baseline file: %w", err)
+	}
+
+	var baseline Baseline
+	if err := json.Unmarshal(data, &baseline); err != nil {
+		return nil, fmt.Errorf("failed to parse baseline JSON: %w", err)
+	}
+
+	return &baseline, nil
+}
+
+// SaveBaseline saves baseline data to a JSON file
+func SaveBaseline(baseline *Baseline, path string) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return fmt.Errorf("failed to create baseline directory: %w", err)
+	}
+
+	data, err := json.MarshalIndent(baseline, "", "  ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal baseline: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		return fmt.Errorf("failed to write baseline file: %w", err)
+	}
+
+	return nil
+}
+
+// CompareWithBaseline compares current metrics against baseline
+func CompareWithBaseline(current, baseline *Baseline, thresholds *ThresholdsConfig) ([]ComparisonResult, error) {
+	var results []ComparisonResult
+
+	for benchName, currentMetric := range current.Benchmarks {
+		baselineMetric, exists := baseline.Benchmarks[benchName]
+		if !exists {
+			// New benchmark, no baseline to compare
+			continue
+		}
+
+		result := ComparisonResult{
+			BenchmarkName: benchName,
+			Baseline:      baselineMetric,
+			Current:       currentMetric,
+		}
+
+		// Calculate percentage changes
+		if baselineMetric.NsPerOp > 0 {
+			result.NsPerOpChange = calculatePercentChange(
+				float64(baselineMetric.NsPerOp),
+				float64(currentMetric.NsPerOp),
+			)
+		}
+
+		if baselineMetric.P95LatencyMs > 0 {
+			result.P95LatencyChange = calculatePercentChange(
+				baselineMetric.P95LatencyMs,
+				currentMetric.P95LatencyMs,
+			)
+		}
+
+		if baselineMetric.ThroughputQPS > 0 {
+			result.ThroughputChange = calculatePercentChange(
+				baselineMetric.ThroughputQPS,
+				currentMetric.ThroughputQPS,
+			)
+		}
+
+		// Determine threshold for this benchmark
+		threshold := getThresholdForBenchmark(benchName, thresholds)
+		result.Threshold = threshold
+
+		// Detect regressions
+		// Latency increase or throughput decrease beyond threshold = regression
+		if result.NsPerOpChange > threshold ||
+			result.P95LatencyChange > threshold ||
+			(result.ThroughputChange < -threshold && baselineMetric.ThroughputQPS > 0) {
+			result.RegressionDetected = true
+		}
+
+		results = append(results, result)
+	}
+
+	return results, nil
+}
+
+// calculatePercentChange calculates percentage change from baseline to current
+// Positive = increase, negative = decrease
+func calculatePercentChange(baseline, current float64) float64 {
+	if baseline == 0 {
+		return 0
+	}
+	return ((current - baseline) / baseline) * 100
+}
+
+// getThresholdForBenchmark retrieves the appropriate threshold for a benchmark
+func getThresholdForBenchmark(benchName string, thresholds *ThresholdsConfig) float64 {
+	// Default threshold
+	defaultThreshold := 10.0
+
+	if thresholds == nil {
+		return defaultThreshold
+	}
+
+	// Try to find specific threshold based on benchmark name
+	// This is a simplified approach - could be made more sophisticated
+	for _, threshold := range thresholds.ComponentBenchmarks.Classification {
+		if threshold.MaxRegressionPercent > 0 {
+			return threshold.MaxRegressionPercent
+		}
+	}
+
+	for _, threshold := range thresholds.ComponentBenchmarks.DecisionEngine {
+		if threshold.MaxRegressionPercent > 0 {
+			return threshold.MaxRegressionPercent
+		}
+	}
+
+	for _, threshold := range thresholds.ComponentBenchmarks.Cache {
+		if threshold.MaxRegressionPercent > 0 {
+			return threshold.MaxRegressionPercent
+		}
+	}
+
+	return defaultThreshold
+}
+
+// HasRegressions checks if any regressions were detected
+func HasRegressions(results []ComparisonResult) bool {
+	for _, result := range results {
+		if result.RegressionDetected {
+			return true
+		}
+	}
+	return false
+}
+
+// PrintComparisonResults prints comparison results in a formatted table
+func PrintComparisonResults(results []ComparisonResult) {
+	fmt.Println("\n" + "===================================================================================")
+	fmt.Println("                        PERFORMANCE COMPARISON RESULTS")
+	fmt.Println("===================================================================================")
+	fmt.Printf("%-50s %-15s %-15s %-15s\n", "Benchmark", "Baseline", "Current", "Change")
+	fmt.Println("-----------------------------------------------------------------------------------")
+
+	for _, result := range results {
+		icon := "✓"
+		if result.RegressionDetected {
+			icon = "⚠️"
+		}
+
+		// Display ns/op comparison
+		fmt.Printf("%s %-48s %-15d %-15d %+.2f%%\n",
+			icon,
+			result.BenchmarkName,
+			result.Baseline.NsPerOp,
+			result.Current.NsPerOp,
+			result.NsPerOpChange,
+		)
+
+		// Display P95 latency if available
+		if result.Baseline.P95LatencyMs > 0 {
+			fmt.Printf("  └─ P95 Latency: %-15.2fms %-15.2fms %+.2f%%\n",
+				result.Baseline.P95LatencyMs,
+				result.Current.P95LatencyMs,
+				result.P95LatencyChange,
+			)
+		}
+
+		// Display throughput if available
+		if result.Baseline.ThroughputQPS > 0 {
+			fmt.Printf("  └─ Throughput:  %-15.2f qps %-15.2f qps %+.2f%%\n",
+				result.Baseline.ThroughputQPS,
+				result.Current.ThroughputQPS,
+				result.ThroughputChange,
+			)
+		}
+	}
+
+	fmt.Println("===================================================================================")
+
+	// Print summary
+	regressionCount := 0
+	for _, result := range results {
+		if result.RegressionDetected {
+			regressionCount++
+		}
+	}
+
+	if regressionCount > 0 {
+		fmt.Printf("\n⚠️  WARNING: %d regression(s) detected!\n", regressionCount)
+	} else {
+		fmt.Printf("\n✓ No regressions detected\n")
+	}
+}
diff --git a/perf/pkg/benchmark/config.go b/perf/pkg/benchmark/config.go
new file mode 100644
index 000000000..0689b061f
--- /dev/null
+++ b/perf/pkg/benchmark/config.go
@@ -0,0 +1,151 @@
+package benchmark
+
+import (
+	"fmt"
+	"os"
+
+	"gopkg.in/yaml.v3"
+)
+
+// Config holds performance testing configuration
+type Config struct {
+	BenchmarkConfig BenchmarkConfigSection `yaml:"benchmark_config"`
+	Profiling       ProfilingConfig        `yaml:"profiling"`
+	Reporting       ReportingConfig        `yaml:"reporting"`
+}
+
+// BenchmarkConfigSection defines benchmark parameters
+type BenchmarkConfigSection struct {
+	Classification ClassificationConfig `yaml:"classification"`
+	Cache          CacheConfig          `yaml:"cache"`
+	E2E            E2EConfig            `yaml:"e2e"`
+}
+
+// ClassificationConfig defines classification benchmark parameters
+type ClassificationConfig struct {
+	BatchSizes       []int `yaml:"batch_sizes"`
+	Iterations       int   `yaml:"iterations"`
+	WarmupIterations int   `yaml:"warmup_iterations"`
+}
+
+// CacheConfig defines cache benchmark parameters
+type CacheConfig struct {
+	CacheSizes        []int   `yaml:"cache_sizes"`
+	ConcurrencyLevels []int   `yaml:"concurrency_levels"`
+	HitRatio          float64 `yaml:"hit_ratio"`
+}
+
+// E2EConfig defines E2E benchmark parameters
+type E2EConfig struct {
+	LoadPatterns []LoadPattern `yaml:"load_patterns"`
+}
+
+// LoadPattern defines a load testing pattern
+type LoadPattern struct {
+	Name     string `yaml:"name"`
+	QPS      int    `yaml:"qps,omitempty"`
+	StartQPS int    `yaml:"start_qps,omitempty"`
+	EndQPS   int    `yaml:"end_qps,omitempty"`
+	Duration string `yaml:"duration"`
+}
+
+// ProfilingConfig defines profiling settings
+type ProfilingConfig struct {
+	EnableCPU       bool   `yaml:"enable_cpu"`
+	EnableMemory    bool   `yaml:"enable_memory"`
+	EnableGoroutine bool   `yaml:"enable_goroutine"`
+	OutputDir       string `yaml:"output_dir"`
+}
+
+// ReportingConfig defines reporting settings
+type ReportingConfig struct {
+	Formats     []string `yaml:"formats"`
+	BaselineDir string   `yaml:"baseline_dir"`
+}
+
+// LoadConfig loads configuration from a YAML file
+func LoadConfig(path string) (*Config, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read config file: %w", err)
+	}
+
+	var config Config
+	if err := yaml.Unmarshal(data, &config); err != nil {
+		return nil, fmt.Errorf("failed to parse config: %w", err)
+	}
+
+	// Set defaults
+	if config.Profiling.OutputDir == "" {
+		config.Profiling.OutputDir = "reports"
+	}
+
+	if config.Reporting.BaselineDir == "" {
+		config.Reporting.BaselineDir = "testdata/baselines"
+	}
+
+	return &config, nil
+}
+
+// ThresholdsConfig holds performance threshold configuration
+type ThresholdsConfig struct {
+	ComponentBenchmarks ComponentBenchmarksThresholds `yaml:"component_benchmarks"`
+	E2ETests            E2ETestsThresholds            `yaml:"e2e_tests"`
+	ResourceLimits      ResourceLimitsThresholds      `yaml:"resource_limits"`
+}
+
+// ComponentBenchmarksThresholds defines thresholds for component benchmarks
+type ComponentBenchmarksThresholds struct {
+	Classification map[string]BenchmarkThreshold `yaml:"classification"`
+	DecisionEngine map[string]BenchmarkThreshold `yaml:"decision_engine"`
+	Cache          map[string]BenchmarkThreshold `yaml:"cache"`
+}
+
+// E2ETestsThresholds defines thresholds for E2E tests
+type E2ETestsThresholds struct {
+	Throughput ThroughputThreshold `yaml:"throughput"`
+	Latency    LatencyThreshold    `yaml:"latency"`
+}
+
+// ResourceLimitsThresholds defines resource limit thresholds
+type ResourceLimitsThresholds struct {
+	MaxMemoryMB   int     `yaml:"max_memory_mb"`
+	MaxGoroutines int     `yaml:"max_goroutines"`
+	MaxCPUPercent float64 `yaml:"max_cpu_percent"`
+}
+
+// BenchmarkThreshold defines thresholds for a single benchmark
+type BenchmarkThreshold struct {
+	MaxP95LatencyMs      float64 `yaml:"max_p95_latency_ms,omitempty"`
+	MaxP99LatencyMs      float64 `yaml:"max_p99_latency_ms,omitempty"`
+	MinThroughputQPS     float64 `yaml:"min_throughput_qps,omitempty"`
+	MinCacheHitRate      float64 `yaml:"min_cache_hit_rate,omitempty"`
+	MaxRegressionPercent float64 `yaml:"max_regression_percent"`
+}
+
+// ThroughputThreshold defines throughput thresholds
+type ThroughputThreshold struct {
+	MinSustainedQPS float64 `yaml:"min_sustained_qps"`
+	MinSuccessRate  float64 `yaml:"min_success_rate"`
+}
+
+// LatencyThreshold defines latency thresholds
+type LatencyThreshold struct {
+	MaxP95Ms float64 `yaml:"max_p95_ms"`
+	MaxP99Ms float64 `yaml:"max_p99_ms"`
+}
+
+// LoadThresholds loads threshold configuration from a YAML file
+func LoadThresholds(path string) (*ThresholdsConfig, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read thresholds file: %w", err)
+	}
+
+	var thresholds ThresholdsConfig
+	if err := yaml.Unmarshal(data, &thresholds); err != nil {
+		return nil, fmt.Errorf("failed to parse thresholds: %w", err)
+	}
+
+	return &thresholds, nil
+}
diff --git a/perf/pkg/benchmark/report.go b/perf/pkg/benchmark/report.go
new file mode 100644
index 000000000..b7f41fc48
--- /dev/null
+++ b/perf/pkg/benchmark/report.go
@@ -0,0 +1,246 @@
+package benchmark
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// Report represents a performance report
+type Report struct {
+	Metadata       ReportMetadata     `json:"metadata"`
+	Comparisons    []ComparisonResult `json:"comparisons"`
+	HasRegressions bool               `json:"has_regressions"`
+	Summary        ReportSummary      `json:"summary"`
+}
+
+// ReportMetadata holds metadata about the report
+type ReportMetadata struct {
+	GeneratedAt time.Time `json:"generated_at"`
+	GitCommit   string    `json:"git_commit"`
+	GitBranch   string    `json:"git_branch"`
+	GoVersion   string    `json:"go_version"`
+}
+
+// ReportSummary holds summary statistics
+type ReportSummary struct {
+	TotalBenchmarks   int `json:"total_benchmarks"`
+	RegressionsFound  int `json:"regressions_found"`
+	ImprovementsFound int `json:"improvements_found"`
+	NoChangeFound     int `json:"no_change_found"`
+}
+
+// GenerateReport creates a performance report from comparison results
+func GenerateReport(comparisons []ComparisonResult, metadata ReportMetadata) *Report {
+	report := &Report{
+		Metadata:       metadata,
+		Comparisons:    comparisons,
+		HasRegressions: HasRegressions(comparisons),
+	}
+
+	// Calculate summary
+	for _, comp := range comparisons {
+		report.Summary.TotalBenchmarks++
+		if comp.RegressionDetected {
+			report.Summary.RegressionsFound++
+		} else if comp.NsPerOpChange < -5 { // 5% improvement threshold
+			report.Summary.ImprovementsFound++
+		} else {
+			report.Summary.NoChangeFound++
+		}
+	}
+
+	return report
+}
+
+// SaveJSON saves the report as JSON
+func (r *Report) SaveJSON(path string) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return fmt.Errorf("failed to create report directory: %w", err)
+	}
+
+	data, err := json.MarshalIndent(r, "", "  ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal report: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		return fmt.Errorf("failed to write report file: %w", err)
+	}
+
+	fmt.Printf("JSON report saved: %s\n", path)
+	return nil
+}
+
+// SaveMarkdown saves the report as Markdown
+func (r *Report) SaveMarkdown(path string) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return fmt.Errorf("failed to create report directory: %w", err)
+	}
+
+	var md strings.Builder
+
+	// Header
+	md.WriteString("# Performance Benchmark Report\n\n")
+	md.WriteString(fmt.Sprintf("**Generated:** %s\n\n", r.Metadata.GeneratedAt.Format(time.RFC3339)))
+	md.WriteString(fmt.Sprintf("**Git Commit:** %s\n\n", r.Metadata.GitCommit))
+	md.WriteString(fmt.Sprintf("**Git Branch:** %s\n\n", r.Metadata.GitBranch))
+	md.WriteString(fmt.Sprintf("**Go Version:** %s\n\n", r.Metadata.GoVersion))
+
+	// Summary
+	md.WriteString("## Summary\n\n")
+	md.WriteString(fmt.Sprintf("- **Total Benchmarks:** %d\n", r.Summary.TotalBenchmarks))
+	md.WriteString(fmt.Sprintf("- **Regressions:** %d\n", r.Summary.RegressionsFound))
+	md.WriteString(fmt.Sprintf("- **Improvements:** %d\n", r.Summary.ImprovementsFound))
+	md.WriteString(fmt.Sprintf("- **No Change:** %d\n\n", r.Summary.NoChangeFound))
+
+	if r.HasRegressions {
+		md.WriteString("⚠️ **WARNING: Performance regressions detected!**\n\n")
+	} else {
+		md.WriteString("✅ **No regressions detected**\n\n")
+	}
+
+	// Detailed results
+	md.WriteString("## Detailed Results\n\n")
+	md.WriteString("| Benchmark | Metric | Baseline | Current | Change | Status |\n")
+	md.WriteString("|-----------|--------|----------|---------|--------|--------|\n")
+
+	for _, comp := range r.Comparisons {
+		status := "✅ OK"
+		if comp.RegressionDetected {
+			status = "⚠️ REGRESSION"
+		} else if comp.NsPerOpChange < -5 {
+			status = "🚀 IMPROVED"
+		}
+
+		// ns/op row
+		md.WriteString(fmt.Sprintf("| %s | ns/op | %d | %d | %+.2f%% | %s |\n",
+			comp.BenchmarkName,
+			comp.Baseline.NsPerOp,
+			comp.Current.NsPerOp,
+			comp.NsPerOpChange,
+			status,
+		))
+
+		// P95 latency row if available
+		if comp.Baseline.P95LatencyMs > 0 {
+			md.WriteString(fmt.Sprintf("| %s | P95 Latency | %.2fms | %.2fms | %+.2f%% | |\n",
+				"",
+				comp.Baseline.P95LatencyMs,
+				comp.Current.P95LatencyMs,
+				comp.P95LatencyChange,
+			))
+		}
+
+		// Throughput row if available
+		if comp.Baseline.ThroughputQPS > 0 {
+			md.WriteString(fmt.Sprintf("| %s | Throughput | %.2f qps | %.2f qps | %+.2f%% | |\n",
+				"",
+				comp.Baseline.ThroughputQPS,
+				comp.Current.ThroughputQPS,
+				comp.ThroughputChange,
+			))
+		}
+	}
+
+	if err := os.WriteFile(path, []byte(md.String()), 0644); err != nil {
+		return fmt.Errorf("failed to write markdown report: %w", err)
+	}
+
+	fmt.Printf("Markdown report saved: %s\n", path)
+	return nil
+}
+
+// SaveHTML saves the report as HTML
+func (r *Report) SaveHTML(path string) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return fmt.Errorf("failed to create report directory: %w", err)
+	}
+
+	var html strings.Builder
+
+	html.WriteString(`<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Performance Benchmark Report</title>
+    <style>
+        body { font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }
+        .container { max-width: 1200px; margin: 0 auto; background-color: white; padding: 20px; border-radius: 8px; }
+        h1 { color: #333; }
+        .metadata { background-color: #f0f0f0; padding: 15px; border-radius: 5px; margin-bottom: 20px; }
+        .summary { display: grid; grid-template-columns: repeat(4, 1fr); gap: 15px; margin-bottom: 20px; }
+        .summary-card { background-color: #e8f4f8; padding: 15px; border-radius: 5px; text-align: center; }
+        .summary-card.regression { background-color: #ffe8e8; }
+        .summary-card.improvement { background-color: #e8ffe8; }
+        table { width: 100%; border-collapse: collapse; margin-top: 20px; }
+        th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }
+        th { background-color: #4CAF50; color: white; }
+        tr:hover { background-color: #f5f5f5; }
+        .regression { color: #d32f2f; font-weight: bold; }
+        .improvement { color: #388e3c; font-weight: bold; }
+        .ok { color: #666; }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Performance Benchmark Report</h1>
+`)
+
+	// Metadata
+	html.WriteString(`        <div class="metadata">`)
+	html.WriteString(fmt.Sprintf(`            <p><strong>Generated:</strong> %s</p>`, r.Metadata.GeneratedAt.Format(time.RFC3339)))
+	html.WriteString(fmt.Sprintf(`            <p><strong>Git Commit:</strong> %s</p>`, r.Metadata.GitCommit))
+	html.WriteString(fmt.Sprintf(`            <p><strong>Git Branch:</strong> %s</p>`, r.Metadata.GitBranch))
+	html.WriteString(fmt.Sprintf(`            <p><strong>Go Version:</strong> %s</p>`, r.Metadata.GoVersion))
+	html.WriteString(`        </div>`)
+
+	// Summary
+	html.WriteString(`        <div class="summary">`)
+	html.WriteString(fmt.Sprintf(`            <div class="summary-card"><h3>%d</h3><p>Total Benchmarks</p></div>`, r.Summary.TotalBenchmarks))
+	html.WriteString(fmt.Sprintf(`            <div class="summary-card regression"><h3>%d</h3><p>Regressions</p></div>`, r.Summary.RegressionsFound))
+	html.WriteString(fmt.Sprintf(`            <div class="summary-card improvement"><h3>%d</h3><p>Improvements</p></div>`, r.Summary.ImprovementsFound))
+	html.WriteString(fmt.Sprintf(`            <div class="summary-card"><h3>%d</h3><p>No Change</p></div>`, r.Summary.NoChangeFound))
+	html.WriteString(`        </div>`)
+
+	// Results table
+	html.WriteString(`        <table>`)
+	html.WriteString(`            <tr><th>Benchmark</th><th>Metric</th><th>Baseline</th><th>Current</th><th>Change</th><th>Status</th></tr>`)
+
+	for _, comp := range r.Comparisons {
+		statusClass := "ok"
+		statusText := "OK"
+		if comp.RegressionDetected {
+			statusClass = "regression"
+			statusText = "REGRESSION"
+		} else if comp.NsPerOpChange < -5 {
+			statusClass = "improvement"
+			statusText = "IMPROVED"
+		}
+
+		html.WriteString(fmt.Sprintf(`            <tr><td>%s</td><td>ns/op</td><td>%d</td><td>%d</td><td>%+.2f%%</td><td class="%s">%s</td></tr>`,
+			comp.BenchmarkName,
+			comp.Baseline.NsPerOp,
+			comp.Current.NsPerOp,
+			comp.NsPerOpChange,
+			statusClass,
+			statusText,
+		))
+	}
+
+	html.WriteString(`        </table>`)
+	html.WriteString(`    </div>`)
+	html.WriteString(`</body>`)
+	html.WriteString(`</html>`)
+
+	if err := os.WriteFile(path, []byte(html.String()), 0644); err != nil {
+		return fmt.Errorf("failed to write HTML report: %w", err)
+	}
+
+	fmt.Printf("HTML report saved: %s\n", path)
+	return nil
+}
diff --git a/perf/pkg/benchmark/runner.go b/perf/pkg/benchmark/runner.go
new file mode 100644
index 000000000..3c50619b9
--- /dev/null
+++ b/perf/pkg/benchmark/runner.go
@@ -0,0 +1,154 @@
+package benchmark
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"runtime"
+	"time"
+)
+
+// Runner orchestrates benchmark execution and profiling
+type Runner struct {
+	config    *Config
+	profiler  *Profiler
+	collector *MetricsCollector
+}
+
+// NewRunner creates a new benchmark runner
+func NewRunner(configPath string) (*Runner, error) {
+	config, err := LoadConfig(configPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to load config: %w", err)
+	}
+
+	profiler := NewProfiler(config.Profiling.OutputDir)
+	collector := NewMetricsCollector()
+
+	return &Runner{
+		config:    config,
+		profiler:  profiler,
+		collector: collector,
+	}, nil
+}
+
+// RunBenchmarks executes all benchmarks with profiling
+func (r *Runner) RunBenchmarks(ctx context.Context, suites []string) (*BenchmarkResults, error) {
+	fmt.Printf("Starting benchmark run at %s\n", time.Now().Format(time.RFC3339))
+	fmt.Printf("Go version: %s\n", runtime.Version())
+	fmt.Printf("GOOS: %s, GOARCH: %s\n", runtime.GOOS, runtime.GOARCH)
+	fmt.Printf("CPU cores: %d\n\n", runtime.NumCPU())
+
+	results := &BenchmarkResults{
+		StartTime: time.Now(),
+		Suites:    make(map[string]*SuiteResult),
+	}
+
+	// Start profiling if enabled
+	if r.config.Profiling.EnableCPU {
+		if err := r.profiler.StartCPU(); err != nil {
+			return nil, fmt.Errorf("failed to start CPU profiling: %w", err)
+		}
+		defer r.profiler.StopCPU()
+	}
+
+	// Collect baseline metrics
+	baselineMetrics := r.collector.Collect()
+	results.BaselineMetrics = baselineMetrics
+
+	// Run benchmark suites
+	for _, suite := range suites {
+		select {
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		default:
+			fmt.Printf("Running benchmark suite: %s\n", suite)
+			// Suite execution will be handled by Go's testing framework
+			// This runner orchestrates the overall process
+		}
+	}
+
+	// Take memory snapshot if enabled
+	if r.config.Profiling.EnableMemory {
+		if err := r.profiler.TakeMemSnapshot(); err != nil {
+			fmt.Fprintf(os.Stderr, "Warning: failed to take memory snapshot: %v\n", err)
+		}
+	}
+
+	// Take goroutine snapshot if enabled
+	if r.config.Profiling.EnableGoroutine {
+		if err := r.profiler.TakeGoroutineSnapshot(); err != nil {
+			fmt.Fprintf(os.Stderr, "Warning: failed to take goroutine snapshot: %v\n", err)
+		}
+	}
+
+	// Collect final metrics
+	finalMetrics := r.collector.Collect()
+	results.FinalMetrics = finalMetrics
+
+	results.EndTime = time.Now()
+	results.Duration = results.EndTime.Sub(results.StartTime)
+
+	return results, nil
+}
+
+// BenchmarkResults holds all benchmark execution results
+type BenchmarkResults struct {
+	StartTime       time.Time
+	EndTime         time.Time
+	Duration        time.Duration
+	Suites          map[string]*SuiteResult
+	BaselineMetrics *RuntimeMetrics
+	FinalMetrics    *RuntimeMetrics
+}
+
+// SuiteResult holds results for a single benchmark suite
+type SuiteResult struct {
+	Name      string
+	Duration  time.Duration
+	TestCount int
+	Passed    int
+	Failed    int
+}
+
+// Profiler handles pprof profiling
+type Profiler struct {
+	outputDir string
+	cpuFile   *os.File
+}
+
+// NewProfiler creates a new profiler
+func NewProfiler(outputDir string) *Profiler {
+	return &Profiler{
+		outputDir: outputDir,
+	}
+}
+
+// MetricsCollector collects runtime metrics
+type MetricsCollector struct{}
+
+// NewMetricsCollector creates a new metrics collector
+func NewMetricsCollector() *MetricsCollector {
+	return &MetricsCollector{}
+}
+
+// RuntimeMetrics holds runtime performance metrics
+type RuntimeMetrics struct {
+	Timestamp      time.Time
+	CPUCount       int
+	GoroutineCount int
+	MemStats       runtime.MemStats
+}
+
+// Collect gathers current runtime metrics
+func (mc *MetricsCollector) Collect() *RuntimeMetrics {
+	var memStats runtime.MemStats
+	runtime.ReadMemStats(&memStats)
+
+	return &RuntimeMetrics{
+		Timestamp:      time.Now(),
+		CPUCount:       runtime.NumCPU(),
+		GoroutineCount: runtime.NumGoroutine(),
+		MemStats:       memStats,
+	}
+}
diff --git a/perf/pkg/profiler/profiler.go b/perf/pkg/profiler/profiler.go
new file mode 100644
index 000000000..0ae15b1c8
--- /dev/null
+++ b/perf/pkg/profiler/profiler.go
@@ -0,0 +1,150 @@
+package profiler
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/pprof"
+	"time"
+)
+
+// Profiler manages pprof profiling operations
+type Profiler struct {
+	outputDir string
+	cpuFile   *os.File
+}
+
+// New creates a new profiler instance
+func New(outputDir string) *Profiler {
+	return &Profiler{
+		outputDir: outputDir,
+	}
+}
+
+// StartCPU begins CPU profiling
+func (p *Profiler) StartCPU() error {
+	if err := os.MkdirAll(p.outputDir, 0755); err != nil {
+		return fmt.Errorf("failed to create output directory: %w", err)
+	}
+
+	filename := filepath.Join(p.outputDir, fmt.Sprintf("cpu-%s.prof", time.Now().Format("20060102-150405")))
+	f, err := os.Create(filename)
+	if err != nil {
+		return fmt.Errorf("failed to create CPU profile file: %w", err)
+	}
+
+	if err := pprof.StartCPUProfile(f); err != nil {
+		f.Close()
+		return fmt.Errorf("failed to start CPU profiling: %w", err)
+	}
+
+	p.cpuFile = f
+	fmt.Printf("CPU profiling started: %s\n", filename)
+	return nil
+}
+
+// StopCPU stops CPU profiling
+func (p *Profiler) StopCPU() error {
+	if p.cpuFile == nil {
+		return nil
+	}
+
+	pprof.StopCPUProfile()
+	if err := p.cpuFile.Close(); err != nil {
+		return fmt.Errorf("failed to close CPU profile file: %w", err)
+	}
+
+	fmt.Printf("CPU profiling stopped: %s\n", p.cpuFile.Name())
+	p.cpuFile = nil
+	return nil
+}
+
+// TakeMemSnapshot takes a memory profile snapshot
+func (p *Profiler) TakeMemSnapshot() error {
+	if err := os.MkdirAll(p.outputDir, 0755); err != nil {
+		return fmt.Errorf("failed to create output directory: %w", err)
+	}
+
+	filename := filepath.Join(p.outputDir, fmt.Sprintf("mem-%s.prof", time.Now().Format("20060102-150405")))
+	f, err := os.Create(filename)
+	if err != nil {
+		return fmt.Errorf("failed to create memory profile file: %w", err)
+	}
+	defer f.Close()
+
+	runtime.GC() // Get up-to-date statistics
+	if err := pprof.WriteHeapProfile(f); err != nil {
+		return fmt.Errorf("failed to write heap profile: %w", err)
+	}
+
+	fmt.Printf("Memory snapshot saved: %s\n", filename)
+	return nil
+}
+
+// TakeGoroutineSnapshot takes a goroutine profile snapshot
+func (p *Profiler) TakeGoroutineSnapshot() error {
+	if err := os.MkdirAll(p.outputDir, 0755); err != nil {
+		return fmt.Errorf("failed to create output directory: %w", err)
+	}
+
+	filename := filepath.Join(p.outputDir, fmt.Sprintf("goroutine-%s.prof", time.Now().Format("20060102-150405")))
+	f, err := os.Create(filename)
+	if err != nil {
+		return fmt.Errorf("failed to create goroutine profile file: %w", err)
+	}
+	defer f.Close()
+
+	if err := pprof.Lookup("goroutine").WriteTo(f, 0); err != nil {
+		return fmt.Errorf("failed to write goroutine profile: %w", err)
+	}
+
+	fmt.Printf("Goroutine snapshot saved: %s\n", filename)
+	return nil
+}
+
+// TakeBlockSnapshot takes a block profile snapshot
+func (p *Profiler) TakeBlockSnapshot() error {
+	runtime.SetBlockProfileRate(1) // Enable block profiling
+
+	if err := os.MkdirAll(p.outputDir, 0755); err != nil {
+		return fmt.Errorf("failed to create output directory: %w", err)
+	}
+
+	filename := filepath.Join(p.outputDir, fmt.Sprintf("block-%s.prof", time.Now().Format("20060102-150405")))
+	f, err := os.Create(filename)
+	if err != nil {
+		return fmt.Errorf("failed to create block profile file: %w", err)
+	}
+	defer f.Close()
+
+	if err := pprof.Lookup("block").WriteTo(f, 0); err != nil {
+		return fmt.Errorf("failed to write block profile: %w", err)
+	}
+
+	fmt.Printf("Block snapshot saved: %s\n", filename)
+	return nil
+}
+
+// TakeMutexSnapshot takes a mutex profile snapshot
+func (p *Profiler) TakeMutexSnapshot() error {
+	runtime.SetMutexProfileFraction(1) // Enable mutex profiling
+
+	if err := os.MkdirAll(p.outputDir, 0755); err != nil {
+		return fmt.Errorf("failed to create output directory: %w", err)
+	}
+
+	filename := filepath.Join(p.outputDir, fmt.Sprintf("mutex-%s.prof", time.Now().Format("20060102-150405")))
+	f, err := os.Create(filename)
+	if err != nil {
+		return fmt.Errorf("failed to create mutex profile file: %w", err)
+	}
+	defer f.Close()
+
+	if err := pprof.Lookup("mutex").WriteTo(f, 0); err != nil {
+		return fmt.Errorf("failed to write mutex profile: %w", err)
+	}
+
+	fmt.Printf("Mutex snapshot saved: %s\n", filename)
+	return nil
+}
diff --git a/perf/scripts/update-baseline.sh b/perf/scripts/update-baseline.sh
new file mode 100755
index 000000000..4fa28d743
--- /dev/null
+++ b/perf/scripts/update-baseline.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+# Update performance baselines from benchmark results
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PERF_DIR="$(dirname "$SCRIPT_DIR")"
+BASELINE_DIR="$PERF_DIR/testdata/baselines"
+
+echo "Updating performance baselines..."
+echo "Baseline directory: $BASELINE_DIR"
+
+# Create baseline directory if it doesn't exist
+mkdir -p "$BASELINE_DIR"
+
+# Get git commit info
+GIT_COMMIT=$(git rev-parse HEAD 2>/dev/null || echo "unknown")
+GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown")
+TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+
+# TODO: Parse benchmark results and create baseline JSON files
+# For now, create placeholder baseline files
+
+echo "Creating baseline files..."
+
+# Classification baseline
+cat > "$BASELINE_DIR/classification.json" <<EOF
+{
+  "version": "v1.0.0",
+  "git_commit": "$GIT_COMMIT",
+  "timestamp": "$TIMESTAMP",
+  "benchmarks": {
+    "BenchmarkClassifyBatch_Size1": {
+      "ns_per_op": 0,
+      "p95_latency_ms": 0,
+      "throughput_qps": 0
+    }
+  }
+}
+EOF
+
+# Decision baseline
+cat > "$BASELINE_DIR/decision.json" <<EOF
+{
+  "version": "v1.0.0",
+  "git_commit": "$GIT_COMMIT",
+  "timestamp": "$TIMESTAMP",
+  "benchmarks": {
+    "BenchmarkEvaluateDecisions_SingleDomain": {
+      "ns_per_op": 0,
+      "p95_latency_ms": 0,
+      "throughput_qps": 0
+    }
+  }
+}
+EOF
+
+# Cache baseline
+cat > "$BASELINE_DIR/cache.json" <<EOF
+{
+  "version": "v1.0.0",
+  "git_commit": "$GIT_COMMIT",
+  "timestamp": "$TIMESTAMP",
+  "benchmarks": {
+    "BenchmarkCacheSearch_1000Entries": {
+      "ns_per_op": 0,
+      "p95_latency_ms": 0,
+      "throughput_qps": 0
+    }
+  }
+}
+EOF
+
+echo "✓ Baseline files updated successfully"
+echo "  Git commit: $GIT_COMMIT"
+echo "  Timestamp: $TIMESTAMP"
diff --git a/perf/testdata/examples/README.md b/perf/testdata/examples/README.md
new file mode 100644
index 000000000..00d294c54
--- /dev/null
+++ b/perf/testdata/examples/README.md
@@ -0,0 +1,267 @@
+# Performance Testing Report Examples
+
+This directory contains example outputs showing what you'll see when running performance tests.
+
+## 📁 Files in This Directory
+
+### 1. **benchmark-output-example.txt**
+Raw benchmark output from `make perf-bench-quick`
+
+**Shows:**
+
+- ns/op (nanoseconds per operation)
+- Throughput (operations per second)
+- Memory allocations
+- P50/P90/P95/P99 latencies
+- Cache hit rates
+
+**Example line:**
+
+```
+BenchmarkClassifyBatch_Size1-8    100  10245678 ns/op  10.25 ms/op  2456 B/op  45 allocs/op
+```
+
+---
+
+### 2. **comparison-example.txt**
+Baseline comparison output from `make perf-compare`
+
+**Shows:**
+
+- Benchmark vs baseline comparison
+- Percentage changes
+- Regression detection
+- Performance improvements
+- Actionable recommendations
+
+**Example:**
+
+```
+⚠️  BenchmarkEvaluateDecisions_Complex: +12.16% (threshold: 10%)
+    - P95 latency increased by 13.04%
+    - ACTION REQUIRED: Investigate
+```
+
+---
+
+### 3. **example-report.json**
+Machine-readable JSON report
+
+**Use for:**
+
+- CI/CD automation
+- Programmatic analysis
+- Data visualization
+- Trend tracking
+
+**Structure:**
+
+```json
+{
+  "metadata": {...},
+  "comparisons": [...],
+  "has_regressions": true,
+  "summary": {...}
+}
+```
+
+---
+
+### 4. **example-report.md**
+Human-readable Markdown report
+
+**Use for:**
+
+- Documentation
+- Sharing results
+- GitHub issues
+- Performance reviews
+
+**Includes:**
+
+- Executive summary
+- Detailed comparison tables
+- Analysis and recommendations
+- Trend insights
+
+---
+
+### 5. **example-report.html**
+Beautiful HTML report with styling
+
+**Features:**
+
+- Professional design
+- Color-coded metrics
+- Interactive elements (when fully implemented)
+- Visual summary cards
+- Detailed tables
+
+**Open in browser:**
+
+```bash
+open perf/testdata/examples/example-report.html
+```
+
+---
+
+### 6. **pr-comment-example.md**
+GitHub PR comment format
+
+**Shows:**
+
+- What appears on your PRs automatically
+- Summary table
+- Key changes highlighted
+- Regression warnings
+- Expandable full results
+
+**Triggered by:** CI workflow on PR
+
+---
+
+### 7. **pprof-example.txt**
+CPU profiling output and interpretation
+
+**Shows:**
+
+- Top CPU consuming functions
+- Flame graph visualization
+- Memory allocation patterns
+- Optimization opportunities
+- Hot spot analysis
+
+**View interactively:**
+
+```bash
+make perf-profile-cpu  # Opens browser at localhost:8080
+```
+
+---
+
+## 🚀 Quick Examples
+
+### Scenario 1: Everything is Good ✅
+
+```
+Summary:
+  Total Benchmarks: 32
+  Regressions: 0
+  Improvements: 5
+  No Change: 27
+
+✓ No regressions detected
+```
+
+### Scenario 2: Regression Detected ⚠️
+
+```
+⚠️  WARNING: 1 regression(s) detected!
+
+BenchmarkEvaluateDecisions_Complex: +12.16%
+  - P95 latency: 0.46ms → 0.52ms (+13.04%)
+  - Throughput: 2189 qps → 1952 qps (-10.83%)
+  - BLOCKS PR (exceeds 10% threshold)
+```
+
+### Scenario 3: Great Improvements 🚀
+
+```
+Significant Improvements:
+  1. Cache Concurrency: +4.34% throughput
+  2. Classification: -3.62% P95 latency
+  3. Request Processing: -2.43% overall
+```
+
+---
+
+## 📊 Understanding the Reports
+
+### Performance Metrics Glossary
+
+| Metric | Description | Good Value |
+|--------|-------------|------------|
+| **ns/op** | Nanoseconds per operation | Lower is better |
+| **P50** | 50th percentile latency | < threshold |
+| **P95** | 95th percentile latency | Most important metric |
+| **P99** | 99th percentile latency | Worst-case performance |
+| **QPS** | Queries per second | Higher is better |
+| **allocs/op** | Allocations per operation | Lower is better |
+| **B/op** | Bytes allocated per operation | Lower is better |
+
+### Status Indicators
+
+- ✅ **OK**: Within acceptable range
+- 🚀 **IMPROVED**: Significant improvement (> 5%)
+- ⚠️ **REGRESSION**: Performance degraded beyond threshold
+- ➡️ **NO CHANGE**: Minimal difference (< 1%)
+
+### Change Interpretation
+
+| Change | Meaning |
+|--------|---------|
+| -10% ns/op | 10% faster (good) |
+| +10% ns/op | 10% slower (bad) |
+| +10% QPS | 10% more throughput (good) |
+| -10% QPS | 10% less throughput (bad) |
+
+---
+
+## 🎯 How to Use These Examples
+
+### For New Users
+
+1. Read `benchmark-output-example.txt` to understand raw output
+2. Check `comparison-example.txt` to see regression detection
+3. View `example-report.html` in browser for full experience
+
+### For CI Integration
+
+1. Reference `pr-comment-example.md` for expected PR comments
+2. Use `example-report.json` structure for automation
+3. Set up thresholds based on example values
+
+### For Performance Optimization
+
+1. Study `pprof-example.txt` for profiling insights
+2. Focus on functions > 5% CPU time
+3. Reduce allocations in hot paths
+4. Run `make perf-profile-cpu` for your code
+
+---
+
+## 🔍 Real vs Example Data
+
+**Note:** These examples use realistic but fictional data. Your actual results will vary based on:
+
+- Hardware (CPU, memory)
+- Model sizes
+- Batch sizes
+- Concurrency levels
+- Code changes
+
+**To generate real reports:**
+
+```bash
+# Run benchmarks
+make perf-bench-quick
+
+# Compare with baseline
+make perf-compare
+
+# Generate reports
+make perf-report
+```
+
+---
+
+## 📚 Learn More
+
+- [Performance Testing README](../../README.md)
+- [Quick Start Guide](../../QUICKSTART.md)
+- [Configuration Reference](../../config/thresholds.yaml)
+- [Makefile Targets](../../../tools/make/performance.mk)
+
+---
+
+*Examples created to help you understand performance testing outputs before running actual tests.*
diff --git a/perf/testdata/examples/benchmark-output-example.txt b/perf/testdata/examples/benchmark-output-example.txt
new file mode 100644
index 000000000..f1c339783
--- /dev/null
+++ b/perf/testdata/examples/benchmark-output-example.txt
@@ -0,0 +1,61 @@
+# Example Benchmark Output
+# This shows what you'll see when running: make perf-bench-quick
+
+goos: linux
+goarch: amd64
+pkg: github.com/vllm-project/semantic-router/perf/benchmarks
+cpu: Intel(R) Xeon(R) CPU @ 2.20GHz
+
+BenchmarkClassifyBatch_Size1-8              100      10245678 ns/op    10.25 ms/op     2456 B/op      45 allocs/op
+BenchmarkClassifyBatch_Size10-8              20      52345678 ns/op    52.35 ms/op    12345 B/op     234 allocs/op
+BenchmarkClassifyBatch_Size50-8               5     215678901 ns/op   215.68 ms/op    56789 B/op    1123 allocs/op
+BenchmarkClassifyBatch_Size100-8              3     412345678 ns/op   412.35 ms/op   112345 B/op    2234 allocs/op
+BenchmarkClassifyBatch_Parallel-8          1000       1234567 ns/op     1.23 ms/op     2456 B/op      45 allocs/op
+BenchmarkClassifyCategory-8                 200       8765432 ns/op     8.77 ms/op     2123 B/op      42 allocs/op
+BenchmarkClassifyPII-8                      150      10123456 ns/op    10.12 ms/op     2234 B/op      43 allocs/op
+BenchmarkClassifyJailbreak-8                180       9876543 ns/op     9.88 ms/op     2345 B/op      44 allocs/op
+BenchmarkCGOOverhead-8                      500       3456789 ns/op     3.46 ms/op     1234 B/op      23 allocs/op
+
+BenchmarkEvaluateDecisions_SingleDomain-8           10000       234567 ns/op     0.23 ms/op      456 B/op      12 allocs/op
+BenchmarkEvaluateDecisions_MultipleDomains-8         5000       345678 ns/op     0.35 ms/op      678 B/op      15 allocs/op
+BenchmarkEvaluateDecisions_WithKeywords-8            8000       267890 ns/op     0.27 ms/op      512 B/op      13 allocs/op
+BenchmarkEvaluateDecisions_ComplexScenario-8         3000       456789 ns/op     0.46 ms/op      890 B/op      18 allocs/op
+BenchmarkEvaluateDecisions_Parallel-8               20000       156789 ns/op     0.16 ms/op      456 B/op      12 allocs/op
+BenchmarkRuleEvaluation_AND-8                       12000       198765 ns/op     0.20 ms/op      489 B/op      11 allocs/op
+BenchmarkRuleEvaluation_OR-8                        15000       176543 ns/op     0.18 ms/op      467 B/op      10 allocs/op
+BenchmarkPrioritySelection-8                         6000       289012 ns/op     0.29 ms/op      623 B/op      14 allocs/op
+
+BenchmarkCacheSearch_1000Entries-8                   500      3456789 ns/op     3.46 ms/op     1234 B/op      23 allocs/op
+    cache_bench_test.go:25: p95_ms: 4.23   p99_ms: 5.67   qps: 289.34   hit_rate_%: 78.50
+BenchmarkCacheSearch_10000Entries-8                  200      7890123 ns/op     7.89 ms/op     2345 B/op      34 allocs/op
+    cache_bench_test.go:48: p95_ms: 9.12   p99_ms: 12.34  qps: 126.74   hit_rate_%: 82.30
+BenchmarkCacheSearch_HNSW-8                          800      2345678 ns/op     2.35 ms/op     1123 B/op      21 allocs/op
+    cache_bench_test.go:71: search_p95_ms: 1.23   embedding_p95_ms: 1.12
+BenchmarkCacheSearch_Linear-8                        300      5678901 ns/op     5.68 ms/op     1456 B/op      25 allocs/op
+    cache_bench_test.go:94: search_p95_ms: 3.45   embedding_p95_ms: 2.23
+BenchmarkCacheConcurrency_1-8                        600      2890123 ns/op     2.89 ms/op     1234 B/op      22 allocs/op
+    cache_bench_test.go:117: qps: 346.02
+BenchmarkCacheConcurrency_10-8                      1500      1234567 ns/op     1.23 ms/op     1345 B/op      24 allocs/op
+    cache_bench_test.go:140: qps: 811.36
+BenchmarkCacheConcurrency_50-8                      3000       789012 ns/op     0.79 ms/op     1456 B/op      26 allocs/op
+    cache_bench_test.go:163: qps: 1267.43   hit_rate_%: 85.20
+BenchmarkCacheHitRate-8                             2000      1123456 ns/op     1.12 ms/op     1378 B/op      25 allocs/op
+    cache_bench_test.go:186: hit_rate_%: 89.70   p95_ms: 1.45
+
+BenchmarkProcessRequest-8                           5000       456789 ns/op     0.46 ms/op      789 B/op      18 allocs/op
+BenchmarkProcessRequestBody-8                       3000       678901 ns/op     0.68 ms/op      912 B/op      21 allocs/op
+BenchmarkHeaderProcessing-8                         8000       234567 ns/op     0.23 ms/op      456 B/op      12 allocs/op
+BenchmarkFullRequestFlow-8                          2000       890123 ns/op     0.89 ms/op     1123 B/op      24 allocs/op
+BenchmarkDifferentRequestTypes/Math-8               2500       712345 ns/op     0.71 ms/op      945 B/op      22 allocs/op
+BenchmarkDifferentRequestTypes/Code-8               2400       734567 ns/op     0.73 ms/op      967 B/op      23 allocs/op
+BenchmarkDifferentRequestTypes/Business-8           2600       698901 ns/op     0.70 ms/op      923 B/op      21 allocs/op
+BenchmarkConcurrentRequests-8                      10000       234567 ns/op     0.23 ms/op      567 B/op      15 allocs/op
+
+PASS
+CPU profiling saved to: ../reports/cpu.prof
+Memory profiling saved to: ../reports/mem.prof
+ok      github.com/vllm-project/semantic-router/perf/benchmarks    89.456s
+
+✓ Benchmarks complete
+  Total time: 89.5s
+  Profiles: reports/cpu.prof, reports/mem.prof
diff --git a/perf/testdata/examples/comparison-example.txt b/perf/testdata/examples/comparison-example.txt
new file mode 100644
index 000000000..d18a533cb
--- /dev/null
+++ b/perf/testdata/examples/comparison-example.txt
@@ -0,0 +1,78 @@
+# Example Baseline Comparison Output
+# This shows what you'll see when running: make perf-compare
+
+Comparing performance with baseline...
+Baseline directory: perf/testdata/baselines/
+Threshold file: perf/config/thresholds.yaml
+
+Loading baselines...
+  ✓ classification.json (15 benchmarks)
+  ✓ decision.json (8 benchmarks)
+  ✓ cache.json (9 benchmarks)
+
+Comparing current results...
+
+===================================================================================
+                        PERFORMANCE COMPARISON RESULTS
+===================================================================================
+Benchmark                                      Baseline        Current         Change
+-----------------------------------------------------------------------------------
+✓ BenchmarkClassifyBatch_Size1-8               10245678        10123456        -1.19%
+  └─ P95 Latency:                              10.50ms         10.12ms         -3.62%
+  └─ Throughput:                               97.60 qps       98.78 qps       +1.21%
+
+✓ BenchmarkClassifyBatch_Size10-8              52345678        51234567        -2.12%
+  └─ P95 Latency:                              53.20ms         51.78ms         -2.67%
+  └─ Throughput:                               19.10 qps       19.52 qps       +2.20%
+
+✓ BenchmarkClassifyBatch_Size50-8              215678901       212345678       -1.54%
+
+✓ BenchmarkClassifyBatch_Size100-8             412345678       410234567       -0.51%
+
+✓ BenchmarkEvaluateDecisions_SingleDomain-8    234567          229876          -2.00%
+  └─ P95 Latency:                              0.24ms          0.23ms          -4.17%
+  └─ Throughput:                               4263 qps        4350 qps        +2.04%
+
+⚠️  BenchmarkEvaluateDecisions_Complex-8       456789          512345          +12.16%
+  └─ P95 Latency:                              0.46ms          0.52ms          +13.04%
+  └─ Throughput:                               2189 qps        1952 qps        -10.83%
+
+✓ BenchmarkCacheSearch_1000Entries-8           3456789         3389012         -1.96%
+  └─ P95 Latency:                              4.23ms          4.15ms          -1.89%
+  └─ Throughput:                               289.34 qps      295.12 qps      +2.00%
+  └─ Hit Rate:                                 78.50%          79.20%          +0.89%
+
+✓ BenchmarkCacheSearch_10000Entries-8          7890123         7823456         -0.84%
+  └─ P95 Latency:                              9.12ms          9.05ms          -0.77%
+
+✓ BenchmarkCacheConcurrency_50-8               789012          756234          -4.16%
+  └─ Throughput:                               1267 qps        1322 qps        +4.34%
+  └─ Hit Rate:                                 85.20%          86.50%          +1.53%
+
+✓ BenchmarkProcessRequest-8                    456789          445678          -2.43%
+
+✓ BenchmarkFullRequestFlow-8                   890123          878901          -1.26%
+
+===================================================================================
+
+Summary:
+  Total Benchmarks:    32
+  Regressions:         1 (3.1%)
+  Improvements:        8 (25.0%)
+  No Change:           23 (71.9%)
+
+⚠️  WARNING: 1 regression(s) detected!
+
+Regressions:
+  1. BenchmarkEvaluateDecisions_Complex-8: +12.16% (threshold: 10%)
+     - P95 latency increased by 13.04%
+     - Throughput decreased by 10.83%
+     - ACTION REQUIRED: Investigate complex decision evaluation performance
+
+Significant Improvements:
+  1. BenchmarkCacheConcurrency_50-8: +4.34% throughput
+  2. BenchmarkEvaluateDecisions_SingleDomain-8: +2.04% throughput
+
+✓ Comparison complete
+  Results saved to: reports/comparison.json
+  Detailed report: reports/comparison.md
diff --git a/perf/testdata/examples/example-report.html b/perf/testdata/examples/example-report.html
new file mode 100644
index 000000000..109920de0
--- /dev/null
+++ b/perf/testdata/examples/example-report.html
@@ -0,0 +1,382 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Performance Benchmark Report - vLLM Semantic Router</title>
+    <style>
+        * { margin: 0; padding: 0; box-sizing: border-box; }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            padding: 20px;
+            color: #333;
+        }
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            background: white;
+            border-radius: 12px;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.3);
+            overflow: hidden;
+        }
+        .header {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 40px;
+            text-align: center;
+        }
+        .header h1 {
+            font-size: 2.5em;
+            margin-bottom: 10px;
+            font-weight: 700;
+        }
+        .header .subtitle {
+            font-size: 1.1em;
+            opacity: 0.9;
+        }
+        .metadata {
+            background: #f8f9fa;
+            padding: 25px 40px;
+            border-bottom: 2px solid #e9ecef;
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 20px;
+        }
+        .metadata-item {
+            display: flex;
+            align-items: center;
+            gap: 10px;
+        }
+        .metadata-item strong {
+            color: #667eea;
+            min-width: 100px;
+        }
+        .summary {
+            display: grid;
+            grid-template-columns: repeat(4, 1fr);
+            gap: 20px;
+            padding: 40px;
+            background: linear-gradient(to bottom, #ffffff 0%, #f8f9fa 100%);
+        }
+        .summary-card {
+            background: white;
+            padding: 30px;
+            border-radius: 12px;
+            text-align: center;
+            box-shadow: 0 4px 15px rgba(0,0,0,0.1);
+            transition: transform 0.3s ease, box-shadow 0.3s ease;
+        }
+        .summary-card:hover {
+            transform: translateY(-5px);
+            box-shadow: 0 8px 25px rgba(0,0,0,0.15);
+        }
+        .summary-card h3 {
+            font-size: 3em;
+            margin-bottom: 10px;
+            font-weight: 700;
+        }
+        .summary-card p {
+            color: #6c757d;
+            font-size: 1.1em;
+            font-weight: 500;
+        }
+        .summary-card.total { border-top: 4px solid #667eea; }
+        .summary-card.total h3 { color: #667eea; }
+        .summary-card.regression { border-top: 4px solid #dc3545; }
+        .summary-card.regression h3 { color: #dc3545; }
+        .summary-card.improvement { border-top: 4px solid #28a745; }
+        .summary-card.improvement h3 { color: #28a745; }
+        .summary-card.nochange { border-top: 4px solid #6c757d; }
+        .summary-card.nochange h3 { color: #6c757d; }
+        .alert {
+            background: #fff3cd;
+            border-left: 4px solid #ffc107;
+            padding: 20px 40px;
+            margin: 0;
+            font-size: 1.1em;
+        }
+        .alert.danger {
+            background: #f8d7da;
+            border-left-color: #dc3545;
+            color: #721c24;
+        }
+        .alert.success {
+            background: #d4edda;
+            border-left-color: #28a745;
+            color: #155724;
+        }
+        .content {
+            padding: 40px;
+        }
+        h2 {
+            color: #667eea;
+            margin: 30px 0 20px 0;
+            font-size: 1.8em;
+            border-bottom: 2px solid #e9ecef;
+            padding-bottom: 10px;
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin: 20px 0;
+            background: white;
+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+            border-radius: 8px;
+            overflow: hidden;
+        }
+        th {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 15px;
+            text-align: left;
+            font-weight: 600;
+            font-size: 0.95em;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }
+        td {
+            padding: 12px 15px;
+            border-bottom: 1px solid #e9ecef;
+        }
+        tr:hover {
+            background: #f8f9fa;
+        }
+        tr:last-child td {
+            border-bottom: none;
+        }
+        .status {
+            display: inline-block;
+            padding: 5px 12px;
+            border-radius: 20px;
+            font-weight: 600;
+            font-size: 0.85em;
+        }
+        .status.ok {
+            background: #d4edda;
+            color: #155724;
+        }
+        .status.regression {
+            background: #f8d7da;
+            color: #721c24;
+        }
+        .status.improved {
+            background: #d1ecf1;
+            color: #0c5460;
+        }
+        .change {
+            font-weight: 600;
+        }
+        .change.positive { color: #28a745; }
+        .change.negative { color: #dc3545; }
+        .change.neutral { color: #6c757d; }
+        .footer {
+            background: #f8f9fa;
+            padding: 30px 40px;
+            text-align: center;
+            color: #6c757d;
+            border-top: 2px solid #e9ecef;
+        }
+        .footer a {
+            color: #667eea;
+            text-decoration: none;
+            font-weight: 600;
+        }
+        .footer a:hover {
+            text-decoration: underline;
+        }
+        .chart-placeholder {
+            background: #f8f9fa;
+            border: 2px dashed #dee2e6;
+            border-radius: 8px;
+            padding: 60px;
+            text-align: center;
+            color: #6c757d;
+            margin: 20px 0;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>📊 Performance Benchmark Report</h1>
+            <p class="subtitle">vLLM Semantic Router Performance Analysis</p>
+        </div>
+
+        <div class="metadata">
+            <div class="metadata-item">
+                <strong>Generated:</strong>
+                <span>2025-12-04 16:30:00 UTC</span>
+            </div>
+            <div class="metadata-item">
+                <strong>Git Commit:</strong>
+                <span>816dbec26397</span>
+            </div>
+            <div class="metadata-item">
+                <strong>Git Branch:</strong>
+                <span>perf_test</span>
+            </div>
+            <div class="metadata-item">
+                <strong>Go Version:</strong>
+                <span>go1.24.1</span>
+            </div>
+        </div>
+
+        <div class="summary">
+            <div class="summary-card total">
+                <h3>32</h3>
+                <p>Total Benchmarks</p>
+            </div>
+            <div class="summary-card regression">
+                <h3>1</h3>
+                <p>Regressions</p>
+            </div>
+            <div class="summary-card improvement">
+                <h3>8</h3>
+                <p>Improvements</p>
+            </div>
+            <div class="summary-card nochange">
+                <h3>23</h3>
+                <p>No Change</p>
+            </div>
+        </div>
+
+        <div class="alert danger">
+            <strong>⚠️ WARNING:</strong> Performance regressions detected! Review the detailed results below.
+        </div>
+
+        <div class="content">
+            <h2>🔍 Detailed Results</h2>
+
+            <table>
+                <thead>
+                    <tr>
+                        <th>Benchmark</th>
+                        <th>Metric</th>
+                        <th>Baseline</th>
+                        <th>Current</th>
+                        <th>Change</th>
+                        <th>Status</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <tr>
+                        <td><strong>BenchmarkClassifyBatch_Size1</strong></td>
+                        <td>ns/op</td>
+                        <td>10,245,678</td>
+                        <td>10,123,456</td>
+                        <td class="change positive">-1.19%</td>
+                        <td><span class="status ok">✅ OK</span></td>
+                    </tr>
+                    <tr>
+                        <td></td>
+                        <td>P95 Latency</td>
+                        <td>10.50ms</td>
+                        <td>10.12ms</td>
+                        <td class="change positive">-3.62%</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td></td>
+                        <td>Throughput</td>
+                        <td>97.60 qps</td>
+                        <td>98.78 qps</td>
+                        <td class="change positive">+1.21%</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td><strong>BenchmarkClassifyBatch_Size10</strong></td>
+                        <td>ns/op</td>
+                        <td>52,345,678</td>
+                        <td>51,234,567</td>
+                        <td class="change positive">-2.12%</td>
+                        <td><span class="status improved">🚀 IMPROVED</span></td>
+                    </tr>
+                    <tr>
+                        <td><strong>BenchmarkEvaluateDecisions_Complex</strong></td>
+                        <td>ns/op</td>
+                        <td>456,789</td>
+                        <td>512,345</td>
+                        <td class="change negative">+12.16%</td>
+                        <td><span class="status regression">⚠️ REGRESSION</span></td>
+                    </tr>
+                    <tr>
+                        <td></td>
+                        <td>P95 Latency</td>
+                        <td>0.46ms</td>
+                        <td>0.52ms</td>
+                        <td class="change negative">+13.04%</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td></td>
+                        <td>Throughput</td>
+                        <td>2,189 qps</td>
+                        <td>1,952 qps</td>
+                        <td class="change negative">-10.83%</td>
+                        <td></td>
+                    </tr>
+                    <tr>
+                        <td><strong>BenchmarkCacheSearch_1000Entries</strong></td>
+                        <td>ns/op</td>
+                        <td>3,456,789</td>
+                        <td>3,389,012</td>
+                        <td class="change positive">-1.96%</td>
+                        <td><span class="status improved">🚀 IMPROVED</span></td>
+                    </tr>
+                    <tr>
+                        <td><strong>BenchmarkCacheConcurrency_50</strong></td>
+                        <td>ns/op</td>
+                        <td>789,012</td>
+                        <td>756,234</td>
+                        <td class="change positive">-4.16%</td>
+                        <td><span class="status improved">🚀 IMPROVED</span></td>
+                    </tr>
+                    <tr>
+                        <td></td>
+                        <td>Throughput</td>
+                        <td>1,267 qps</td>
+                        <td>1,322 qps</td>
+                        <td class="change positive">+4.34%</td>
+                        <td></td>
+                    </tr>
+                </tbody>
+            </table>
+
+            <h2>📈 Performance Trends</h2>
+            <div class="chart-placeholder">
+                <p>📊 Interactive charts would appear here</p>
+                <p style="font-size: 0.9em; margin-top: 10px;">Showing latency trends, throughput over time, and component comparisons</p>
+            </div>
+
+            <h2>🔴 Regressions (Action Required)</h2>
+            <table>
+                <tr>
+                    <th>Benchmark</th>
+                    <th>Issue</th>
+                    <th>Impact</th>
+                    <th>Recommendation</th>
+                </tr>
+                <tr>
+                    <td><strong>BenchmarkEvaluateDecisions_Complex</strong></td>
+                    <td>P95 latency +13.04%<br>Throughput -10.83%</td>
+                    <td>Complex decision scenarios slowed significantly</td>
+                    <td>Profile with <code>make perf-profile-cpu</code><br>Investigate rule matching optimization</td>
+                </tr>
+            </table>
+
+            <h2>✅ Significant Improvements</h2>
+            <ul style="line-height: 2; font-size: 1.1em;">
+                <li><strong>Cache Concurrency:</strong> +4.34% throughput improvement under high load</li>
+                <li><strong>Classification Batch Processing:</strong> Consistent 1-2% improvements across all batch sizes</li>
+                <li><strong>Request Processing:</strong> 2.43% faster header/body handling</li>
+            </ul>
+        </div>
+
+        <div class="footer">
+            <p>Performance testing powered by <a href="https://github.com/vllm-project/semantic-router" target="_blank">vLLM Semantic Router</a></p>
+            <p style="margin-top: 10px; font-size: 0.9em;">Generated with ❤️ by the performance testing framework</p>
+        </div>
+    </div>
+</body>
+</html>
diff --git a/perf/testdata/examples/example-report.json b/perf/testdata/examples/example-report.json
new file mode 100644
index 000000000..185249e10
--- /dev/null
+++ b/perf/testdata/examples/example-report.json
@@ -0,0 +1,79 @@
+{
+  "metadata": {
+    "generated_at": "2025-12-04T16:30:00Z",
+    "git_commit": "816dbec26397",
+    "git_branch": "perf_test",
+    "go_version": "go1.24.1"
+  },
+  "comparisons": [
+    {
+      "benchmark_name": "BenchmarkClassifyBatch_Size1",
+      "baseline": {
+        "ns_per_op": 10245678,
+        "p50_latency_ms": 9.85,
+        "p95_latency_ms": 10.50,
+        "p99_latency_ms": 11.20,
+        "throughput_qps": 97.60,
+        "allocs_per_op": 45,
+        "bytes_per_op": 2456
+      },
+      "current": {
+        "ns_per_op": 10123456,
+        "p50_latency_ms": 9.72,
+        "p95_latency_ms": 10.12,
+        "p99_latency_ms": 10.89,
+        "throughput_qps": 98.78,
+        "allocs_per_op": 45,
+        "bytes_per_op": 2456
+      },
+      "ns_per_op_change": -1.19,
+      "p95_latency_change": -3.62,
+      "throughput_change": 1.21,
+      "regression_detected": false,
+      "threshold": 10.0
+    },
+    {
+      "benchmark_name": "BenchmarkEvaluateDecisions_ComplexScenario",
+      "baseline": {
+        "ns_per_op": 456789,
+        "p95_latency_ms": 0.46,
+        "throughput_qps": 2189
+      },
+      "current": {
+        "ns_per_op": 512345,
+        "p95_latency_ms": 0.52,
+        "throughput_qps": 1952
+      },
+      "ns_per_op_change": 12.16,
+      "p95_latency_change": 13.04,
+      "throughput_change": -10.83,
+      "regression_detected": true,
+      "threshold": 10.0
+    },
+    {
+      "benchmark_name": "BenchmarkCacheSearch_1000Entries",
+      "baseline": {
+        "ns_per_op": 3456789,
+        "p95_latency_ms": 4.23,
+        "throughput_qps": 289.34
+      },
+      "current": {
+        "ns_per_op": 3389012,
+        "p95_latency_ms": 4.15,
+        "throughput_qps": 295.12
+      },
+      "ns_per_op_change": -1.96,
+      "p95_latency_change": -1.89,
+      "throughput_change": 2.00,
+      "regression_detected": false,
+      "threshold": 10.0
+    }
+  ],
+  "has_regressions": true,
+  "summary": {
+    "total_benchmarks": 32,
+    "regressions_found": 1,
+    "improvements_found": 8,
+    "no_change_found": 23
+  }
+}
diff --git a/perf/testdata/examples/example-report.md b/perf/testdata/examples/example-report.md
new file mode 100644
index 000000000..02ca9074e
--- /dev/null
+++ b/perf/testdata/examples/example-report.md
@@ -0,0 +1,103 @@
+# Performance Benchmark Report
+
+**Generated:** 2025-12-04T16:30:00Z
+
+**Git Commit:** 816dbec26397
+
+**Git Branch:** perf_test
+
+**Go Version:** go1.24.1
+
+## Summary
+
+- **Total Benchmarks:** 32
+- **Regressions:** 1
+- **Improvements:** 8
+- **No Change:** 23
+
+⚠️ **WARNING: Performance regressions detected!**
+
+## Detailed Results
+
+| Benchmark | Metric | Baseline | Current | Change | Status |
+|-----------|--------|----------|---------|--------|--------|
+| BenchmarkClassifyBatch_Size1 | ns/op | 10245678 | 10123456 | -1.19% | ✅ OK |
+|  | P95 Latency | 10.50ms | 10.12ms | -3.62% |  |
+|  | Throughput | 97.60 qps | 98.78 qps | +1.21% |  |
+| BenchmarkClassifyBatch_Size10 | ns/op | 52345678 | 51234567 | -2.12% | 🚀 IMPROVED |
+|  | P95 Latency | 53.20ms | 51.78ms | -2.67% |  |
+|  | Throughput | 19.10 qps | 19.52 qps | +2.20% |  |
+| BenchmarkClassifyBatch_Size50 | ns/op | 215678901 | 212345678 | -1.54% | ✅ OK |
+| BenchmarkClassifyBatch_Size100 | ns/op | 412345678 | 410234567 | -0.51% | ✅ OK |
+| BenchmarkClassifyCategory | ns/op | 8765432 | 8654321 | -1.27% | ✅ OK |
+| BenchmarkClassifyPII | ns/op | 10123456 | 10089123 | -0.34% | ✅ OK |
+| BenchmarkCGOOverhead | ns/op | 3456789 | 3423456 | -0.96% | ✅ OK |
+| BenchmarkEvaluateDecisions_SingleDomain | ns/op | 234567 | 229876 | -2.00% | 🚀 IMPROVED |
+|  | P95 Latency | 0.24ms | 0.23ms | -4.17% |  |
+|  | Throughput | 4263 qps | 4350 qps | +2.04% |  |
+| BenchmarkEvaluateDecisions_MultipleDomains | ns/op | 345678 | 342123 | -1.03% | ✅ OK |
+| BenchmarkEvaluateDecisions_WithKeywords | ns/op | 267890 | 265432 | -0.92% | ✅ OK |
+| BenchmarkEvaluateDecisions_ComplexScenario | ns/op | 456789 | 512345 | +12.16% | ⚠️ REGRESSION |
+|  | P95 Latency | 0.46ms | 0.52ms | +13.04% |  |
+|  | Throughput | 2189 qps | 1952 qps | -10.83% |  |
+| BenchmarkRuleEvaluation_AND | ns/op | 198765 | 195432 | -1.68% | ✅ OK |
+| BenchmarkRuleEvaluation_OR | ns/op | 176543 | 174321 | -1.26% | ✅ OK |
+| BenchmarkPrioritySelection | ns/op | 289012 | 286789 | -0.77% | ✅ OK |
+| BenchmarkCacheSearch_1000Entries | ns/op | 3456789 | 3389012 | -1.96% | 🚀 IMPROVED |
+|  | P95 Latency | 4.23ms | 4.15ms | -1.89% |  |
+|  | Throughput | 289.34 qps | 295.12 qps | +2.00% |  |
+| BenchmarkCacheSearch_10000Entries | ns/op | 7890123 | 7823456 | -0.84% | ✅ OK |
+|  | P95 Latency | 9.12ms | 9.05ms | -0.77% |  |
+| BenchmarkCacheSearch_HNSW | ns/op | 2345678 | 2312345 | -1.42% | ✅ OK |
+| BenchmarkCacheSearch_Linear | ns/op | 5678901 | 5623456 | -0.98% | ✅ OK |
+| BenchmarkCacheConcurrency_1 | ns/op | 2890123 | 2856789 | -1.15% | ✅ OK |
+| BenchmarkCacheConcurrency_10 | ns/op | 1234567 | 1212345 | -1.80% | 🚀 IMPROVED |
+| BenchmarkCacheConcurrency_50 | ns/op | 789012 | 756234 | -4.16% | 🚀 IMPROVED |
+|  | Throughput | 1267 qps | 1322 qps | +4.34% |  |
+| BenchmarkProcessRequest | ns/op | 456789 | 445678 | -2.43% | 🚀 IMPROVED |
+| BenchmarkProcessRequestBody | ns/op | 678901 | 671234 | -1.13% | ✅ OK |
+| BenchmarkHeaderProcessing | ns/op | 234567 | 231234 | -1.42% | ✅ OK |
+| BenchmarkFullRequestFlow | ns/op | 890123 | 878901 | -1.26% | ✅ OK |
+
+## Analysis
+
+### Regressions (Action Required)
+
+1. **BenchmarkEvaluateDecisions_ComplexScenario** (+12.16%)
+   - P95 latency increased from 0.46ms to 0.52ms (+13.04%)
+   - Throughput decreased from 2189 qps to 1952 qps (-10.83%)
+   - **Root Cause:** Likely due to increased complexity in rule evaluation for multi-domain scenarios
+   - **Recommendation:** Profile with `make perf-profile-cpu` and investigate decision engine optimization
+
+### Significant Improvements
+
+1. **BenchmarkCacheConcurrency_50** (-4.16%)
+   - Throughput improved from 1267 qps to 1322 qps (+4.34%)
+   - Better concurrency handling under high load
+
+2. **BenchmarkProcessRequest** (-2.43%)
+   - Faster request processing through optimized header parsing
+
+3. **BenchmarkEvaluateDecisions_SingleDomain** (-2.00%)
+   - Throughput improved from 4263 qps to 4350 qps (+2.04%)
+
+### Performance Trends
+
+- **Classification:** Stable or slightly improved across all batch sizes
+- **Decision Engine:** Mixed results - simple scenarios improved, complex scenarios regressed
+- **Cache:** Consistent improvements in concurrency scenarios
+- **ExtProc:** All metrics showing improvements
+
+## Recommendations
+
+1. **Immediate:** Investigate `BenchmarkEvaluateDecisions_ComplexScenario` regression
+   - Run: `make perf-profile-cpu`
+   - Focus on rule matching and priority selection code paths
+
+2. **Monitor:** Watch for further regressions in complex decision scenarios in future PRs
+
+3. **Optimize:** Consider applying cache concurrency improvements to other components
+
+---
+
+*Performance testing powered by [vLLM Semantic Router](https://github.com/vllm-project/semantic-router)*
diff --git a/perf/testdata/examples/pprof-example.txt b/perf/testdata/examples/pprof-example.txt
new file mode 100644
index 000000000..3440938d4
--- /dev/null
+++ b/perf/testdata/examples/pprof-example.txt
@@ -0,0 +1,168 @@
+# Example pprof CPU Profile Output
+
+## Command Line View (go tool pprof -top reports/cpu.prof)
+
+```
+File: semantic-router-benchmarks
+Type: cpu
+Time: Dec 4, 2025 at 4:30pm (UTC)
+Duration: 45.67s, Total samples = 42.34s (92.71%)
+Showing nodes accounting for 38.12s, 90.03% of 42.34s total
+Dropped 156 nodes (cum <= 0.21s)
+Showing top 20 nodes out of 245
+
+      flat  flat%   sum%        cum   cum%
+    8.45s 19.96% 19.96%     12.34s 29.15%  runtime.mallocgc
+    5.67s 13.39% 33.35%     18.23s 43.05%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification.(*UnifiedClassifier).ClassifyBatch
+    4.23s  9.99% 43.34%      9.12s 21.54%  runtime.scanobject
+    3.45s  8.15% 51.49%      7.89s 18.63%  C.classify_unified_batch (CGO)
+    2.89s  6.83% 58.32%      6.78s 16.01%  github.com/vllm-project/semantic-router/candle-binding.ClassifyBatch
+    2.34s  5.53% 63.85%      5.67s 13.39%  runtime.mapassign_faststr
+    2.12s  5.01% 68.86%      4.56s 10.77%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/decision.(*Engine).EvaluateDecisions
+    1.89s  4.46% 73.32%      3.45s  8.15%  encoding/json.Unmarshal
+    1.67s  3.94% 77.26%      2.89s  6.83%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache.(*InMemoryCache).FindSimilarWithThreshold
+    1.45s  3.42% 80.68%      2.34s  5.53%  runtime.newobject
+    1.23s  2.91% 83.59%      2.12s  5.01%  strings.Builder.WriteString
+    1.12s  2.65% 86.24%      1.89s  4.46%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/extproc.(*OpenAIRouter).Process
+    0.98s  2.31% 88.55%      1.67s  3.94%  runtime.typedmemmove
+    0.87s  2.06% 90.61%      1.45s  3.42%  runtime.gcBgMarkWorker
+    0.76s  1.80% 92.41%      1.23s  2.91%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/decision.evaluateRuleCombination
+    0.65s  1.54% 93.95%      1.12s  2.65%  runtime.memmove
+    0.54s  1.28% 95.23%      0.98s  2.31%  runtime.convT2Estring
+    0.43s  1.02% 96.25%      0.87s  2.06%  github.com/vllm-project/semantic-router/candle-binding.generateEmbedding
+    0.32s  0.76% 97.01%      0.76s  1.80%  runtime.heapBitsSetType
+    0.21s  0.50% 97.51%      0.65s  1.54%  sync.(*Mutex).Lock
+```
+
+## Interpretation
+
+### Hot Spots Identified:
+
+1. **Memory Allocation (19.96%)**
+   - `runtime.mallocgc` is the top consumer
+   - High allocation rate in classification path
+   - **Action:** Reduce allocations, use object pools
+
+2. **Classification (13.39%)**
+   - `ClassifyBatch` using significant CPU
+   - Combined with CGO call (8.15%), totals ~21%
+   - **Action:** Optimize batch processing, reduce CGO overhead
+
+3. **CGO Overhead (8.15%)**
+   - `C.classify_unified_batch` taking considerable time
+   - Data marshalling between Go and Rust
+   - **Action:** Batch more requests, reduce call frequency
+
+4. **Decision Engine (5.01%)**
+   - `EvaluateDecisions` is efficient
+   - Could be further optimized for complex scenarios
+   - **Action:** Profile rule matching specifically
+
+5. **Cache Operations (3.94%)**
+   - `FindSimilarWithThreshold` reasonable
+   - HNSW index performing well
+   - **Action:** Monitor as cache grows
+
+## Web UI View (go tool pprof -http=:8080 reports/cpu.prof)
+
+When you run `make perf-profile-cpu`, a browser opens showing:
+
+### 1. Flame Graph View
+```
+┌──────────────────────────────────────────────────────────────────────────┐
+│                          runtime.main (100%)                              │
+├──────────────────────────────────────────────────────────────────────────┤
+│                    testing.(*M).Run (95%)                                 │
+├──────────────────────────────────────────────────────────────────────────┤
+│             BenchmarkClassifyBatch_Size10 (45%)                          │
+│  ┌─────────────────────────────────────────────┐                        │
+│  │  UnifiedClassifier.ClassifyBatch (40%)      │                        │
+│  │  ┌───────────────────────────────────┐     │                        │
+│  │  │  C.classify_unified_batch (20%)   │     │                        │
+│  │  │  ┌─────────────────────┐          │     │                        │
+│  │  │  │  Rust BERT (15%)    │          │     │                        │
+│  │  │  └─────────────────────┘          │     │                        │
+│  │  │  ┌─────────────────────┐          │     │                        │
+│  │  │  │  CGO marshaling(5%) │          │     │                        │
+│  │  │  └─────────────────────┘          │     │                        │
+│  │  └───────────────────────────────────┘     │                        │
+│  │  ┌───────────────────────────────────┐     │                        │
+│  │  │  JSON processing (10%)            │     │                        │
+│  │  └───────────────────────────────────┘     │                        │
+│  └─────────────────────────────────────────────┘                        │
+└──────────────────────────────────────────────────────────────────────────┘
+```
+
+### 2. Top Functions
+- Click on any function to drill down
+- See call graph and callers
+- Identify optimization opportunities
+
+### 3. Graph View
+Shows function call relationships with:
+- Box size = CPU time
+- Arrow thickness = call frequency
+- Red/hot colors = hot paths
+
+## Memory Profile Example (go tool pprof -top reports/mem.prof)
+
+```
+File: semantic-router-benchmarks
+Type: alloc_space
+Time: Dec 4, 2025 at 4:30pm (UTC)
+Showing nodes accounting for 1.23GB, 89.13% of 1.38GB total
+
+      flat  flat%   sum%        cum   cum%
+  345.67MB 25.05% 25.05%   567.89MB 41.15%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/classification.(*UnifiedClassifier).ClassifyBatch
+  234.56MB 17.01% 42.06%   345.67MB 25.05%  runtime.makeslice
+  156.78MB 11.36% 53.42%   234.56MB 17.01%  encoding/json.Unmarshal
+  123.45MB  8.95% 62.37%   156.78MB 11.36%  github.com/vllm-project/semantic-router/candle-binding.ClassifyBatch
+   98.76MB  7.16% 69.53%   123.45MB  8.95%  strings.Builder.Grow
+   87.65MB  6.35% 75.88%    98.76MB  7.16%  runtime.convTslice
+   76.54MB  5.55% 81.43%    87.65MB  6.35%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache.generateEmbedding
+   65.43MB  4.74% 86.17%    76.54MB  5.55%  runtime.mapassign_faststr
+   54.32MB  3.94% 90.11%    65.43MB  4.74%  github.com/vllm-project/semantic-router/src/semantic-router/pkg/decision.(*Engine).EvaluateDecisions
+```
+
+## Key Insights from Profiling
+
+### Optimization Opportunities:
+
+1. **Reduce Allocations in Classification**
+   - 345MB allocated in ClassifyBatch
+   - Use sync.Pool for temporary buffers
+   - Reuse slice capacity
+
+2. **Optimize JSON Marshalling**
+   - 156MB in json.Unmarshal
+   - Consider using encoding/json alternatives
+   - Pre-allocate structures
+
+3. **String Operations**
+   - 98MB in strings.Builder
+   - Use byte slices instead of strings
+   - Reduce string concatenation
+
+4. **Cache Embeddings**
+   - 76MB in generateEmbedding
+   - Implement embedding cache
+   - Batch embedding generation
+
+### Performance Wins Expected:
+
+- **Classification:** 15-20% faster with pooling
+- **Memory:** 30-40% reduction with reuse
+- **GC Pressure:** Significant reduction
+- **Throughput:** 10-15% improvement
+
+## How to Use This Data
+
+1. **Identify Hot Spots:** Focus on functions > 5% CPU
+2. **Reduce Allocations:** Functions allocating > 100MB
+3. **Optimize Loops:** Look for nested calls in hot paths
+4. **Batch Operations:** Reduce CGO call frequency
+5. **Profile Again:** Verify improvements
+
+---
+
+*Run `make perf-profile-cpu` to see this in your browser!*
diff --git a/perf/testdata/examples/pr-comment-example.md b/perf/testdata/examples/pr-comment-example.md
new file mode 100644
index 000000000..0a4f6502e
--- /dev/null
+++ b/perf/testdata/examples/pr-comment-example.md
@@ -0,0 +1,129 @@
+# Example GitHub PR Comment
+
+This is what will automatically appear as a comment on your PR when performance tests run in CI.
+
+---
+
+## 🔥 Performance Benchmark Results
+
+**Commit:** `816dbec26397` | **Branch:** `perf_test` | **Run:** [#1234](https://github.com/vllm-project/semantic-router/actions/runs/1234)
+
+### Summary
+
+| Metric | Count | Percentage |
+|--------|-------|------------|
+| ✅ Total Benchmarks | 32 | 100% |
+| ⚠️ Regressions | 1 | 3.1% |
+| 🚀 Improvements | 8 | 25.0% |
+| ➡️ No Change | 23 | 71.9% |
+
+---
+
+### 📊 Key Performance Changes
+
+| Component | Metric | Baseline | Current | Change | Status |
+|-----------|--------|----------|---------|--------|--------|
+| **Classification** (batch=1) | P95 Latency | 10.50ms | 10.12ms | -3.62% | ✅ |
+| **Classification** (batch=10) | Throughput | 19.10 qps | 19.52 qps | +2.20% | 🚀 |
+| **Decision Engine** (complex) | P95 Latency | 0.46ms | 0.52ms | **+13.04%** | ⚠️ |
+| **Decision Engine** (complex) | Throughput | 2189 qps | 1952 qps | **-10.83%** | ⚠️ |
+| **Cache** (1K entries) | P95 Latency | 4.23ms | 4.15ms | -1.89% | ✅ |
+| **Cache** (concurrency=50) | Throughput | 1267 qps | 1322 qps | +4.34% | 🚀 |
+
+---
+
+### ⚠️ Regressions Detected
+
+**1 regression exceeds threshold (10%):**
+
+#### `BenchmarkEvaluateDecisions_ComplexScenario`
+
+- **Latency:** 0.46ms → 0.52ms (+13.04%) ⚠️
+- **Throughput:** 2189 qps → 1952 qps (-10.83%) ⚠️
+- **Threshold:** 10% (exceeded by 3.04%)
+
+**Action Required:**
+
+- Review complex decision evaluation logic
+- Run `make perf-profile-cpu` locally to identify bottleneck
+- Consider optimizing rule matching for multi-domain scenarios
+
+---
+
+### 🚀 Notable Improvements
+
+1. **Cache Concurrency** (+4.34% throughput)
+   - Better performance under high concurrent load
+   - Improved from 1267 qps to 1322 qps
+
+2. **Classification Latency** (-3.62% P95)
+   - Single-text classification now faster
+   - Reduced from 10.50ms to 10.12ms
+
+3. **Request Processing** (-2.43%)
+   - ExtProc handler optimization showing results
+
+---
+
+### 📁 Artifacts
+
+- [Full Benchmark Results](https://github.com/vllm-project/semantic-router/actions/runs/1234/artifacts)
+- [CPU Profile](https://github.com/vllm-project/semantic-router/actions/runs/1234/artifacts/cpu.prof)
+- [Memory Profile](https://github.com/vllm-project/semantic-router/actions/runs/1234/artifacts/mem.prof)
+
+---
+
+### 💡 Next Steps
+
+To investigate the regression locally:
+
+```bash
+# Run benchmarks with profiling
+make perf-bench
+
+# View CPU profile
+make perf-profile-cpu
+
+# Compare against baseline
+make perf-compare
+```
+
+---
+
+<details>
+<summary>📋 View All Benchmark Results</summary>
+
+| Benchmark | ns/op | Change | Status |
+|-----------|-------|--------|--------|
+| BenchmarkClassifyBatch_Size1 | 10,123,456 | -1.19% | ✅ |
+| BenchmarkClassifyBatch_Size10 | 51,234,567 | -2.12% | 🚀 |
+| BenchmarkClassifyBatch_Size50 | 212,345,678 | -1.54% | ✅ |
+| BenchmarkClassifyBatch_Size100 | 410,234,567 | -0.51% | ✅ |
+| BenchmarkClassifyCategory | 8,654,321 | -1.27% | ✅ |
+| BenchmarkClassifyPII | 10,089,123 | -0.34% | ✅ |
+| BenchmarkClassifyJailbreak | 9,823,456 | -0.54% | ✅ |
+| BenchmarkCGOOverhead | 3,423,456 | -0.96% | ✅ |
+| BenchmarkEvaluateDecisions_Single | 229,876 | -2.00% | 🚀 |
+| BenchmarkEvaluateDecisions_Multiple | 342,123 | -1.03% | ✅ |
+| BenchmarkEvaluateDecisions_WithKeywords | 265,432 | -0.92% | ✅ |
+| BenchmarkEvaluateDecisions_Complex | 512,345 | **+12.16%** | ⚠️ |
+| BenchmarkRuleEvaluation_AND | 195,432 | -1.68% | ✅ |
+| BenchmarkRuleEvaluation_OR | 174,321 | -1.26% | ✅ |
+| BenchmarkPrioritySelection | 286,789 | -0.77% | ✅ |
+| BenchmarkCacheSearch_1000 | 3,389,012 | -1.96% | 🚀 |
+| BenchmarkCacheSearch_10000 | 7,823,456 | -0.84% | ✅ |
+| BenchmarkCacheSearch_HNSW | 2,312,345 | -1.42% | ✅ |
+| BenchmarkCacheSearch_Linear | 5,623,456 | -0.98% | ✅ |
+| BenchmarkCacheConcurrency_1 | 2,856,789 | -1.15% | ✅ |
+| BenchmarkCacheConcurrency_10 | 1,212,345 | -1.80% | 🚀 |
+| BenchmarkCacheConcurrency_50 | 756,234 | -4.16% | 🚀 |
+| BenchmarkProcessRequest | 445,678 | -2.43% | 🚀 |
+| BenchmarkProcessRequestBody | 671,234 | -1.13% | ✅ |
+| BenchmarkHeaderProcessing | 231,234 | -1.42% | ✅ |
+| BenchmarkFullRequestFlow | 878,901 | -1.26% | ✅ |
+
+</details>
+
+---
+
+*Performance testing powered by [vLLM Semantic Router](https://github.com/vllm-project/semantic-router) • Generated at 2025-12-04 16:30:00 UTC*
diff --git a/tools/make/performance.mk b/tools/make/performance.mk
new file mode 100644
index 000000000..f7b3293a7
--- /dev/null
+++ b/tools/make/performance.mk
@@ -0,0 +1,175 @@
+# ============== performance.mk ==============
+# =   Performance testing related targets   =
+# ============== performance.mk ==============
+
+##@ Performance Testing
+
+# Create reports directory if it doesn't exist
+.PHONY: ensure-reports-dir
+ensure-reports-dir:
+	@mkdir -p reports
+
+# Run all performance benchmarks
+perf-bench: ## Run all performance benchmarks
+perf-bench: build-router ensure-reports-dir
+	@$(LOG_TARGET)
+	@echo "Running performance benchmarks..."
+	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
+	cd perf && go test -bench=. -benchmem -benchtime=10s ./benchmarks/... \
+	  -cpuprofile=../reports/cpu.prof \
+	  -memprofile=../reports/mem.prof \
+	  -timeout=30m
+
+# Run quick performance benchmarks (shorter benchtime for faster iteration)
+perf-bench-quick: ## Run quick performance benchmarks (3s benchtime)
+perf-bench-quick: build-router ensure-reports-dir
+	@$(LOG_TARGET)
+	@echo "Running quick performance benchmarks..."
+	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
+	cd perf && go test -bench=. -benchmem -benchtime=3s ./benchmarks/... \
+	  -timeout=15m
+
+# Run specific benchmark suite
+perf-bench-classification: ## Run classification benchmarks
+perf-bench-classification: build-router ensure-reports-dir
+	@$(LOG_TARGET)
+	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
+	cd perf && go test -bench=BenchmarkClassify.* -benchmem -benchtime=10s ./benchmarks/
+
+perf-bench-decision: ## Run decision engine benchmarks
+perf-bench-decision: build-router ensure-reports-dir
+	@$(LOG_TARGET)
+	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
+	cd perf && go test -bench=BenchmarkEvaluate.* -benchmem -benchtime=10s ./benchmarks/
+
+perf-bench-cache: ## Run cache benchmarks
+perf-bench-cache: build-router ensure-reports-dir
+	@$(LOG_TARGET)
+	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
+	cd perf && go test -bench=BenchmarkCache.* -benchmem -benchtime=10s ./benchmarks/
+
+# Run E2E performance tests
+perf-e2e: ## Run E2E performance tests
+perf-e2e: build-e2e ensure-reports-dir
+	@$(LOG_TARGET)
+	@echo "Running E2E performance tests..."
+	@./bin/e2e -profile=ai-gateway \
+	  -tests=performance-throughput,performance-latency,performance-resource
+
+# Compare against baseline
+perf-compare: ## Compare current performance against baseline
+perf-compare: ensure-reports-dir
+	@$(LOG_TARGET)
+	@echo "Comparing performance against baseline..."
+	@cd perf && go run cmd/perftest/main.go \
+	  --compare-baseline=testdata/baselines/ \
+	  --threshold-file=config/thresholds.yaml \
+	  --output=../reports/comparison.json
+
+# Run benchmarks with CPU profiling
+perf-profile-cpu: ## Run benchmarks with CPU profiling and open pprof
+perf-profile-cpu: perf-bench
+	@$(LOG_TARGET)
+	@echo "Opening CPU profile..."
+	@go tool pprof -http=:8080 reports/cpu.prof
+
+# Run benchmarks with memory profiling
+perf-profile-mem: ## Run benchmarks with memory profiling and open pprof
+perf-profile-mem: perf-bench
+	@$(LOG_TARGET)
+	@echo "Opening memory profile..."
+	@go tool pprof -http=:8080 reports/mem.prof
+
+# Generate CPU flame graph
+perf-flamegraph: ## Generate CPU flame graph
+perf-flamegraph: perf-bench
+	@$(LOG_TARGET)
+	@echo "Generating CPU flame graph..."
+	@go tool pprof -http=:8080 reports/cpu.prof &
+
+# Update performance baselines
+perf-baseline-update: ## Update performance baselines
+perf-baseline-update: ensure-reports-dir
+	@$(LOG_TARGET)
+	@echo "Running benchmarks to update baseline..."
+	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
+	cd perf && go test -bench=. -benchmem -benchtime=30s ./benchmarks/... \
+	  | tee ../reports/bench-results.txt
+	@echo "Updating baselines..."
+	@cd perf/scripts && ./update-baseline.sh
+
+# Generate performance report
+perf-report: ## Generate performance report (requires comparison.json)
+perf-report: ensure-reports-dir
+	@$(LOG_TARGET)
+	@echo "Generating performance report..."
+	@cd perf && go run cmd/perftest/main.go \
+	  --generate-report \
+	  --input=../reports/comparison.json \
+	  --output=../reports/perf-report.html
+
+# Clean performance test artifacts
+perf-clean: ## Clean performance test artifacts
+	@$(LOG_TARGET)
+	@echo "Cleaning performance test artifacts..."
+	@rm -rf reports/*.prof reports/*.json reports/*.html reports/*.md
+	@echo "Performance artifacts cleaned"
+
+# Run continuous performance monitoring (for local development)
+perf-watch: ## Continuously run quick benchmarks on file changes
+	@echo "Watching for changes and running quick benchmarks..."
+	@while true; do \
+		make perf-bench-quick; \
+		echo "Waiting for changes... (Ctrl+C to stop)"; \
+		sleep 30; \
+	done
+
+# Performance test with specific concurrency
+perf-bench-concurrency: ## Run benchmarks with specific concurrency (e.g., CONCURRENCY=4)
+perf-bench-concurrency: build-router ensure-reports-dir
+	@$(LOG_TARGET)
+	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
+	export GOMAXPROCS=$${CONCURRENCY:-4} && \
+	cd perf && go test -bench=.*Parallel -benchmem -benchtime=10s ./benchmarks/...
+
+# Run performance regression check (exits with error if regressions found)
+perf-check: ## Run benchmarks and fail if regressions detected
+perf-check: perf-bench perf-compare
+	@$(LOG_TARGET)
+	@if grep -q '"has_regressions": true' reports/comparison.json 2>/dev/null; then \
+		echo "❌ Performance regressions detected!"; \
+		cat reports/comparison.json; \
+		exit 1; \
+	else \
+		echo "✅ No performance regressions detected"; \
+	fi
+
+# Show performance test help
+perf-help: ## Show performance testing help
+	@echo "Performance Testing Targets:"
+	@echo ""
+	@echo "Quick Start:"
+	@echo "  make perf-bench              - Run all benchmarks (10s per test)"
+	@echo "  make perf-bench-quick        - Run quick benchmarks (3s per test)"
+	@echo "  make perf-compare            - Compare against baseline"
+	@echo "  make perf-check              - Run benchmarks and fail on regression"
+	@echo ""
+	@echo "Component Benchmarks:"
+	@echo "  make perf-bench-classification - Benchmark classification"
+	@echo "  make perf-bench-decision       - Benchmark decision engine"
+	@echo "  make perf-bench-cache          - Benchmark cache"
+	@echo ""
+	@echo "Profiling:"
+	@echo "  make perf-profile-cpu        - Profile CPU usage"
+	@echo "  make perf-profile-mem        - Profile memory usage"
+	@echo "  make perf-flamegraph         - Generate flame graph"
+	@echo ""
+	@echo "E2E Performance:"
+	@echo "  make perf-e2e                - Run E2E performance tests"
+	@echo ""
+	@echo "Baselines & Reports:"
+	@echo "  make perf-baseline-update    - Update performance baselines"
+	@echo "  make perf-report             - Generate HTML report"
+	@echo ""
+	@echo "Cleanup:"
+	@echo "  make perf-clean              - Clean performance artifacts"