From 879dcdd20b5ab78848146af6c2c76a8faf456a8c Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Fri, 26 Sep 2025 22:19:30 +0800 Subject: [PATCH 1/2] Fix prefix-cache-scorer benchmark panic --- Makefile | 4 + .../plugins/multi/prefix/plugin_test.go | 73 +++++++++---------- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/Makefile b/Makefile index 66b9a16e2..88e98c402 100644 --- a/Makefile +++ b/Makefile @@ -142,6 +142,10 @@ test-unit: ## Run unit tests. go tool cover -func=cover.out; \ rm cover.out +.PHONY: test-benchmark +test-benchmark: ## Run benchmarks. + CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./pkg/... -bench=. -benchmem; + .PHONY: test-integration test-integration: envtest ## Run integration tests. CGO_ENABLED=1 KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go index 54a00abc1..3cb777956 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go +++ b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go @@ -19,7 +19,6 @@ package prefix import ( "context" "fmt" - "math" "math/rand" "strings" "testing" @@ -357,49 +356,44 @@ func BenchmarkPrefixPluginStress(b *testing.B) { plugin := New(context.Background(), config) types.NewCycleState() var promptLen []int - for i := 1; i <= 1024; i++ { + for i := 1; i <= 1024; { promptLen = append(promptLen, i) + i += 10 } promptLen = append(promptLen, 2048, 4096, 8192, 10000, 20000, 50000) - for _, i := range promptLen { - // Generate increasing-length random prompts - prompt := randomPrompt(4 + i) - pod := &types.PodMetrics{ - Pod: &backend.Pod{ - NamespacedName: k8stypes.NamespacedName{ - Name: fmt.Sprintf("random-pod-%d", i), + for i, v := range promptLen { + b.Run(fmt.Sprintf("messages_%d_length_%d", i, v), func(b *testing.B) { + // Generate increasing-length random prompts + prompt := randomPrompt(4 + v) + pod := &types.PodMetrics{ + Pod: &backend.Pod{ + NamespacedName: k8stypes.NamespacedName{ + Name: fmt.Sprintf("random-pod-%d", v), + }, }, - }, - } - - pods := []types.Pod{pod} - req := &types.LLMRequest{ - RequestId: uuid.NewString(), - TargetModel: "model-stress", - Body: &types.LLMRequestBody{ - Completions: &types.CompletionsRequest{ - Prompt: prompt, + } + + pods := []types.Pod{pod} + req := &types.LLMRequest{ + RequestId: uuid.NewString(), + TargetModel: "model-stress", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: prompt, + }, }, - }, - } - - // First cycle: simulate scheduling and insert prefix info into the cache - plugin.Score(context.Background(), types.NewCycleState(), req, pods) - schedulingResult := &types.SchedulingResult{ - PrimaryProfileName: "default", - ProfileResults: map[string]*types.ProfileRunResult{ - "default": {TargetPods: []types.Pod{pod}}, - }, - } - plugin.PreRequest(context.Background(), req, schedulingResult, 0) - plugin.wg.Wait() - - // Second cycle: validate internal state - state, err := plugins.ReadPluginStateKey[*SchedulingContextState](plugin.pluginState, req.RequestId, plugins.StateKey(plugin.TypedName().String())) - assert.NoError(b, err) - expectedHashes := int(math.Min(float64(maxPrefixBlocks), float64(len(req.Body.Completions.Prompt)/blockSize))) - assert.Equal(b, expectedHashes, len(state.PrefixHashes), "number of hashes is incorrect") + } + + b.ResetTimer() + // Benchmark the scoring operation + scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods) + _ = scores // Use the result to prevent optimization + + // Clean up state for next iteration + plugin.pluginState.Delete(req.RequestId) + }) + } } @@ -424,6 +418,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) { } plugin := New(context.Background(), config) + //cycleState := types.NewCycleState() // Test scenarios: varying number of messages and message lengths scenarios := []struct { @@ -476,7 +471,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { // Benchmark the scoring operation - scores := plugin.Score(context.Background(), nil, req, pods) + scores := plugin.Score(context.Background(), types.NewCycleState(), req, pods) _ = scores // Use the result to prevent optimization // Clean up state for next iteration From 59210d737a091f0f757457f89adc21910c6572dd Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Mon, 29 Sep 2025 13:55:11 +0800 Subject: [PATCH 2/2] fix lint --- .../scheduling/framework/plugins/multi/prefix/plugin_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go index 3cb777956..7d4e1a487 100644 --- a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go +++ b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go @@ -416,9 +416,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) { MaxPrefixBlocksToMatch: maxPrefixBlocks, LRUCapacityPerServer: DefaultLRUCapacityPerServer, } - plugin := New(context.Background(), config) - //cycleState := types.NewCycleState() // Test scenarios: varying number of messages and message lengths scenarios := []struct {