From 879dcdd20b5ab78848146af6c2c76a8faf456a8c Mon Sep 17 00:00:00 2001
From: Murphy Chen <minquan.chen@daocloud.io>
Date: Fri, 26 Sep 2025 22:19:30 +0800
Subject: [PATCH 1/2] Fix prefix-cache-scorer benchmark panic

---
 Makefile                                      |  4 +
 .../plugins/multi/prefix/plugin_test.go       | 73 +++++++++----------
 2 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/Makefile b/Makefile
index 66b9a16e2..88e98c402 100644
--- a/Makefile
+++ b/Makefile
@@ -142,6 +142,10 @@ test-unit: ## Run unit tests.
 	go tool cover -func=cover.out; \
 	rm cover.out
 
+.PHONY: test-benchmark
+test-benchmark: ## Run benchmarks.
+	CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./pkg/... -bench=. -benchmem;
+
 .PHONY: test-integration
 test-integration: envtest ## Run integration tests.
 	CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out
diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
index 54a00abc1..3cb777956 100644
--- a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
+++ b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
@@ -19,7 +19,6 @@ package prefix
 import (
 	"context"
 	"fmt"
-	"math"
 	"math/rand"
 	"strings"
 	"testing"
@@ -357,49 +356,44 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
 	plugin := New(context.Background(), config)
 	types.NewCycleState()
 	var promptLen []int
-	for i := 1; i <= 1024; i++ {
+	for i := 1; i <= 1024; {
 		promptLen = append(promptLen, i)
+		i += 10
 	}
 	promptLen = append(promptLen, 2048, 4096, 8192, 10000, 20000, 50000)
 
-	for _, i := range promptLen {
-		// Generate increasing-length random prompts
-		prompt := randomPrompt(4 + i)
-		pod := &types.PodMetrics{
-			Pod: &backend.Pod{
-				NamespacedName: k8stypes.NamespacedName{
-					Name: fmt.Sprintf("random-pod-%d", i),
+	for i, v := range promptLen {
+		b.Run(fmt.Sprintf("messages_%d_length_%d", i, v), func(b *testing.B) {
+			// Generate increasing-length random prompts
+			prompt := randomPrompt(4 + v)
+			pod := &types.PodMetrics{
+				Pod: &backend.Pod{
+					NamespacedName: k8stypes.NamespacedName{
+						Name: fmt.Sprintf("random-pod-%d", v),
+					},
 				},
-			},
-		}
-
-		pods := []types.Pod{pod}
-		req := &types.LLMRequest{
-			RequestId:   uuid.NewString(),
-			TargetModel: "model-stress",
-			Body: &types.LLMRequestBody{
-				Completions: &types.CompletionsRequest{
-					Prompt: prompt,
+			}
+
+			pods := []types.Pod{pod}
+			req := &types.LLMRequest{
+				RequestId:   uuid.NewString(),
+				TargetModel: "model-stress",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: prompt,
+					},
 				},
-			},
-		}
-
-		// First cycle: simulate scheduling and insert prefix info into the cache
-		plugin.Score(context.Background(), types.NewCycleState(), req, pods)
-		schedulingResult := &types.SchedulingResult{
-			PrimaryProfileName: "default",
-			ProfileResults: map[string]*types.ProfileRunResult{
-				"default": {TargetPods: []types.Pod{pod}},
-			},
-		}
-		plugin.PreRequest(context.Background(), req, schedulingResult, 0)
-		plugin.wg.Wait()
-
-		// Second cycle: validate internal state
-		state, err := plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req.RequestId, plugins.StateKey(plugin.TypedName().String()))
-		assert.NoError(b, err)
-		expectedHashes := int(math.Min(float64(maxPrefixBlocks), float64(len(req.Body.Completions.Prompt)/blockSize)))
-		assert.Equal(b, expectedHashes, len(state.PrefixHashes), "number of hashes is incorrect")
+			}
+
+			b.ResetTimer()
+			// Benchmark the scoring operation
+			scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
+			_ = scores // Use the result to prevent optimization
+
+			// Clean up state for next iteration
+			plugin.pluginState.Delete(req.RequestId)
+		})
+
 	}
 }
 
@@ -424,6 +418,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
 	}
 
 	plugin := New(context.Background(), config)
+	//cycleState := types.NewCycleState()
 
 	// Test scenarios: varying number of messages and message lengths
 	scenarios := []struct {
@@ -476,7 +471,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
 				// Benchmark the scoring operation
-				scores := plugin.Score(context.Background(), nil, req, pods)
+				scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods)
 				_ = scores // Use the result to prevent optimization
 
 				// Clean up state for next iteration

From 59210d737a091f0f757457f89adc21910c6572dd Mon Sep 17 00:00:00 2001
From: Murphy Chen <minquan.chen@daocloud.io>
Date: Mon, 29 Sep 2025 13:55:11 +0800
Subject: [PATCH 2/2] fix lint

---
 .../scheduling/framework/plugins/multi/prefix/plugin_test.go    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
index 3cb777956..7d4e1a487 100644
--- a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
+++ b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go
@@ -416,9 +416,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
 		MaxPrefixBlocksToMatch: maxPrefixBlocks,
 		LRUCapacityPerServer:   DefaultLRUCapacityPerServer,
 	}
-
 	plugin := New(context.Background(), config)
-	//cycleState := types.NewCycleState()
 
 	// Test scenarios: varying number of messages and message lengths
 	scenarios := []struct {