kubernetes-sigs · Frapschen · Sep 26, 2025 · Sep 29, 2025
diff --git a/Makefile b/Makefile
@@ -142,6 +142,10 @@ test-unit: ## Run unit tests.
 	go tool cover -func=cover.out; \
 	rm cover.out
 
+.PHONY: test-benchmark
+test-benchmark: ## Run benchmarks.
+	CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./pkg/... -bench=. -benchmem;
+
 .PHONY: test-integration
 test-integration: envtest ## Run integration tests.
 	CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out

diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
@@ -19,7 +19,6 @@ package prefix
 import (
 	"context"
 	"fmt"
-	"math"
 	"math/rand"
 	"strings"
 	"testing"
@@ -357,49 +356,44 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
 	plugin := New(context.Background(), config)
 	types.NewCycleState()
 	var promptLen []int
-	for i := 1; i <= 1024; i++ {
+	for i := 1; i <= 1024; {
 		promptLen = append(promptLen, i)
+		i += 10
 	}
 	promptLen = append(promptLen, 2048, 4096, 8192, 10000, 20000, 50000)
 
-	for _, i := range promptLen {
-		// Generate increasing-length random prompts
-		prompt := randomPrompt(4 + i)
-		pod := &types.PodMetrics{
-			Pod: &backend.Pod{
-				NamespacedName: k8stypes.NamespacedName{
-					Name: fmt.Sprintf("random-pod-%d", i),
+	for i, v := range promptLen {
+		b.Run(fmt.Sprintf("messages_%d_length_%d", i, v), func(b *testing.B) {
+			// Generate increasing-length random prompts
+			prompt := randomPrompt(4 + v)
+			pod := &types.PodMetrics{
+				Pod: &backend.Pod{
+					NamespacedName: k8stypes.NamespacedName{
+						Name: fmt.Sprintf("random-pod-%d", v),
+					},
 				},
-			},
-		}
-
-		pods := []types.Pod{pod}
-		req := &types.LLMRequest{
-			RequestId:   uuid.NewString(),
-			TargetModel: "model-stress",
-			Body: &types.LLMRequestBody{
-				Completions: &types.CompletionsRequest{
-					Prompt: prompt,
+			}
+
+			pods := []types.Pod{pod}
+			req := &types.LLMRequest{
+				RequestId:   uuid.NewString(),
+				TargetModel: "model-stress",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: prompt,
+					},
 				},
-			},
-		}
-
-		// First cycle: simulate scheduling and insert prefix info into the cache
-		plugin.Score(context.Background(), types.NewCycleState(), req, pods)
-		schedulingResult := &types.SchedulingResult{
-			PrimaryProfileName: "default",
-			ProfileResults: map[string]*types.ProfileRunResult{
-				"default": {TargetPods: []types.Pod{pod}},
-			},
-		}
-		plugin.PreRequest(context.Background(), req, schedulingResult, 0)
-		plugin.wg.Wait()
-
-		// Second cycle: validate internal state
-		state, err := plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req.RequestId, plugins.StateKey(plugin.TypedName().String()))
-		assert.NoError(b, err)
-		expectedHashes := int(math.Min(float64(maxPrefixBlocks), float64(len(req.Body.Completions.Prompt)/blockSize)))
-		assert.Equal(b, expectedHashes, len(state.PrefixHashes), "number of hashes is incorrect")
+			}
+
+			b.ResetTimer()
+			// Benchmark the scoring operation
+			scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
+			_ = scores // Use the result to prevent optimization
+
+			// Clean up state for next iteration
+			plugin.pluginState.Delete(req.RequestId)
+		})
+
 	}
 }
 
@@ -422,7 +416,6 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
 		MaxPrefixBlocksToMatch: maxPrefixBlocks,
 		LRUCapacityPerServer:   DefaultLRUCapacityPerServer,
 	}
-
 	plugin := New(context.Background(), config)
 
 	// Test scenarios: varying number of messages and message lengths
@@ -476,7 +469,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
 				// Benchmark the scoring operation
-				scores := plugin.Score(context.Background(), nil, req, pods)
+				scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
 				_ = scores // Use the result to prevent optimization
 
 				// Clean up state for next iteration