@@ -35,7 +35,7 @@ import (
3535
3636func TestPrefixPluginCompletion (t * testing.T ) {
3737 config := Config {
38- DefaultBlockSize : 4 ,
38+ BlockSize : 4 ,
3939 MaxPrefixBlocksToMatch : DefaultMaxPrefixBlocks ,
4040 LRUCapacityPerServer : DefaultLRUCapacityPerServer ,
4141 }
@@ -201,7 +201,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
201201
202202func TestPrefixPluginChatCompletions (t * testing.T ) {
203203 config := Config {
204- DefaultBlockSize : 4 ,
204+ BlockSize : 4 ,
205205 MaxPrefixBlocksToMatch : DefaultMaxPrefixBlocks ,
206206 LRUCapacityPerServer : DefaultLRUCapacityPerServer ,
207207 }
@@ -235,7 +235,7 @@ func TestPrefixPluginChatCompletions(t *testing.T) {
235235
236236func TestPrefixPluginChatCompletionsGrowth (t * testing.T ) {
237237 config := Config {
238- DefaultBlockSize : 8 , // Use larger block size for more predictable JSON marshaling
238+ BlockSize : 8 , // Use larger block size for more predictable JSON marshaling
239239 MaxPrefixBlocksToMatch : DefaultMaxPrefixBlocks ,
240240 LRUCapacityPerServer : DefaultLRUCapacityPerServer ,
241241 }
@@ -349,7 +349,7 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
349349 blockSize := 4
350350 maxPrefixBlocks := 50000
351351 config := Config {
352- DefaultBlockSize : blockSize ,
352+ BlockSize : blockSize ,
353353 MaxPrefixBlocksToMatch : maxPrefixBlocks ,
354354 LRUCapacityPerServer : DefaultLRUCapacityPerServer ,
355355 }
@@ -409,7 +409,7 @@ func TestNew_InvalidConfigFallbacks(t *testing.T) {
409409 {
410410 name : "all zero" ,
411411 config : Config {
412- DefaultBlockSize : 0 ,
412+ BlockSize : 0 ,
413413 MaxPrefixBlocksToMatch : 0 ,
414414 LRUCapacityPerServer : 0 ,
415415 },
@@ -420,7 +420,7 @@ func TestNew_InvalidConfigFallbacks(t *testing.T) {
420420 {
421421 name : "negative values" ,
422422 config : Config {
423- DefaultBlockSize : - 5 ,
423+ BlockSize : - 5 ,
424424 MaxPrefixBlocksToMatch : - 10 ,
425425 LRUCapacityPerServer : - 100 ,
426426 },
@@ -431,7 +431,7 @@ func TestNew_InvalidConfigFallbacks(t *testing.T) {
431431 {
432432 name : "mixed valid and invalid" ,
433433 config : Config {
434- DefaultBlockSize : 32 , // valid
434+ BlockSize : 32 , // valid
435435 MaxPrefixBlocksToMatch : - 1 , // invalid
436436 LRUCapacityPerServer : 50000 , // valid
437437 },
@@ -442,7 +442,7 @@ func TestNew_InvalidConfigFallbacks(t *testing.T) {
442442 {
443443 name : "all valid" ,
444444 config : Config {
445- DefaultBlockSize : 64 ,
445+ BlockSize : 64 ,
446446 MaxPrefixBlocksToMatch : 200 ,
447447 LRUCapacityPerServer : 30000 ,
448448 },
@@ -459,13 +459,108 @@ func TestNew_InvalidConfigFallbacks(t *testing.T) {
459459
460460 assert .NotEmpty (t , plugin )
461461 assert .NotEmpty (t , plugin .indexer )
462- assert .Equal (t , tt .expectBlock , plugin .config .DefaultBlockSize )
462+ assert .Equal (t , tt .expectBlock , plugin .config .BlockSize )
463463 assert .Equal (t , tt .expectMaxMatch , plugin .config .MaxPrefixBlocksToMatch )
464464 assert .Equal (t , tt .expectCapacity , plugin .config .LRUCapacityPerServer )
465465 })
466466 }
467467}
468468
469+ func TestPrefixPluginAutoTune (t * testing.T ) {
470+ // Setup common test data
471+ podName := "pod-autotune"
472+ pod := & types.PodMetrics {
473+ Pod : & backend.Pod {NamespacedName : k8stypes.NamespacedName {Name : podName }},
474+ MetricsState : & backendmetrics.MetricsState {
475+ CacheBlockSize : 16 , // 16 tokens * 4 chars/token = 64 chars per block
476+ CacheNumGPUBlocks : 1000 , // 1000 blocks capacity
477+ },
478+ }
479+ pods := []types.Pod {pod }
480+
481+ req := & types.LLMRequest {
482+ RequestId : uuid .NewString (),
483+ TargetModel : "test-model" ,
484+ Body : & types.LLMRequestBody {
485+ Completions : & types.CompletionsRequest {
486+ // Length 128 chars.
487+ // If AutoTune=true (block size 64): 2 blocks
488+ // If AutoTune=false (block size 32): 4 blocks
489+ Prompt : strings .Repeat ("a" , 128 ),
490+ },
491+ },
492+ }
493+
494+ t .Run ("AutoTune Enabled" , func (t * testing.T ) {
495+ config := Config {
496+ AutoTune : true ,
497+ BlockSize : 32 , // Should be ignored in favor of pod metrics (64)
498+ MaxPrefixBlocksToMatch : DefaultMaxPrefixBlocks ,
499+ // Should be ignored in favor of pod metrics (1000)
500+ LRUCapacityPerServer : 1 ,
501+ }
502+ plugin := New (context .Background (), config )
503+
504+ // 1. Verify Score uses pod metrics for block size
505+ scores := plugin .Score (context .Background (), types .NewCycleState (), req , pods )
506+ _ = scores
507+
508+ state , err := plugins .ReadPluginStateKey [* SchedulingContextState ](plugin .pluginState , req .RequestId , plugins .StateKey (plugin .TypedName ().String ()))
509+ assert .NoError (t , err )
510+ // Block size from pod is 16 tokens * 4 = 64 chars.
511+ // Prompt is 128 chars.
512+ // Expected blocks: 128/64 = 2 hashes (model hash is used as seed but not returned as a block)
513+ assert .Equal (t , 2 , len (state .PrefixHashes ), "Should use pod block size (64 chars) -> 2 body blocks" )
514+
515+ // 2. Verify PreRequest uses pod metrics for capacity
516+ schedulingResult := & types.SchedulingResult {
517+ PrimaryProfileName : "default" ,
518+ ProfileResults : map [string ]* types.ProfileRunResult {
519+ "default" : {TargetPods : []types.Pod {pod }},
520+ },
521+ }
522+ plugin .PreRequest (context .Background (), req , schedulingResult )
523+ plugin .wg .Wait ()
524+
525+ // Check indexer state
526+ assert .Contains (t , plugin .indexer .Pods (), ServerID (pod .GetPod ().NamespacedName ))
527+ })
528+
529+ t .Run ("AutoTune Disabled" , func (t * testing.T ) {
530+ config := Config {
531+ AutoTune : false ,
532+ BlockSize : 32 , // Should be used (32 chars)
533+ MaxPrefixBlocksToMatch : DefaultMaxPrefixBlocks ,
534+ LRUCapacityPerServer : 1 , // Should be used, and the first hash should be evicted due to the small
535+ }
536+ plugin := New (context .Background (), config )
537+
538+ // 1. Verify Score uses config BlockSize
539+ req .RequestId = uuid .NewString () // New request ID
540+ scores := plugin .Score (context .Background (), types .NewCycleState (), req , pods )
541+ _ = scores
542+
543+ state , err := plugins .ReadPluginStateKey [* SchedulingContextState ](plugin .pluginState , req .RequestId , plugins .StateKey (plugin .TypedName ().String ()))
544+ assert .NoError (t , err )
545+ // Block size from config is 32 chars.
546+ // Prompt is 128 chars.
547+ // 128 / 32 = 4 chunks.
548+ assert .Equal (t , 4 , len (state .PrefixHashes ), "Should use config block size (32 chars) -> 4 body blocks" )
549+
550+ // 2. Verify PreRequest uses config LRUCapacityPerServer
551+ schedulingResult := & types.SchedulingResult {
552+ PrimaryProfileName : "default" ,
553+ ProfileResults : map [string ]* types.ProfileRunResult {
554+ "default" : {TargetPods : []types.Pod {pod }},
555+ },
556+ }
557+ plugin .PreRequest (context .Background (), req , schedulingResult )
558+ plugin .wg .Wait ()
559+
560+ assert .Contains (t , plugin .indexer .Pods (), ServerID (pod .GetPod ().NamespacedName ))
561+ })
562+ }
563+
469564// randomPrompt generates a pseudo-random string of length n using lowercase letters.
470565func randomPrompt (n int ) string {
471566 runes := []rune ("abcdefghijklmnopqrstuvwxyz" )
@@ -481,7 +576,7 @@ func BenchmarkPrefixPluginChatCompletionsStress(b *testing.B) {
481576 blockSize := 8
482577 maxPrefixBlocks := 50000
483578 config := Config {
484- DefaultBlockSize : blockSize ,
579+ BlockSize : blockSize ,
485580 MaxPrefixBlocksToMatch : maxPrefixBlocks ,
486581 LRUCapacityPerServer : DefaultLRUCapacityPerServer ,
487582 }
0 commit comments