11package scorer
22
33import (
4+ "fmt"
45 "os"
56 "path/filepath"
67 "testing"
@@ -21,12 +22,17 @@ import (
2122)
2223
2324func TestPrefixCacheTracking_Score (t * testing.T ) {
25+ const modelName = "test-model"
26+
2427 d , err := os .Getwd ()
2528 require .NoError (t , err )
2629 modelDir := filepath .Join (d , "testdata" )
2730 localTokenizerConfig := tokenization.LocalTokenizerConfig {
2831 ModelTokenizerMap : map [string ]string {
29- "test-model" : filepath .Join (modelDir , "test-model/tokenizer.json" ),
32+ modelName : filepath .Join (
33+ modelDir ,
34+ fmt .Sprintf ("%s/tokenizer.json" , modelName )
35+ ),
3036 },
3137 }
3238
@@ -66,7 +72,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
6672 },
6773 request : & types.LLMRequest {
6874 RequestId : "test-request" ,
69- TargetModel : "test-model" ,
75+ TargetModel : modelName ,
7076 Body : nil ,
7177 },
7278 wantScoresByAddress : map [string ]float64 {}, // empty map
@@ -104,7 +110,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
104110 },
105111 request : & types.LLMRequest {
106112 RequestId : "test-request" ,
107- TargetModel : "test-model" ,
113+ TargetModel : modelName ,
108114 Body : & types.LLMRequestBody {
109115 Completions : & types.CompletionsRequest {
110116 Prompt : prompt ,
@@ -183,7 +189,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
183189 },
184190 request : & types.LLMRequest {
185191 RequestId : "test-request" ,
186- TargetModel : "test-model" ,
192+ TargetModel : modelName ,
187193 Body : & types.LLMRequestBody {
188194 ChatCompletions : & types.ChatCompletionsRequest {
189195 ChatTemplate : `{% for message in messages %}{{ message.role }}: {{ message.content }}
@@ -209,32 +215,22 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
209215 require .NotNil (t , req .ChatCompletions , "req expected to use ChatCompletions API" )
210216
211217 // convert to preprocessing format
212- var conversations []preprocessing.Conversation
213- for _ , msg := range req .ChatCompletions .Messages {
214- conversations = append ( conversations , preprocessing.Conversation {
218+ conversation := make ( []preprocessing.Conversation , len ( req . ChatCompletions . Messages ))
219+ for i , msg := range req .ChatCompletions .Messages {
220+ conversation [ i ] = preprocessing.Conversation {
215221 Role : msg .Role ,
216222 Content : msg .Content .Raw ,
217- })
223+ }
218224 }
219225
220- processor := preprocessing .NewChatTemplatingProcessor ()
221- tokenizerCacheKey , err := processor .GetOrCreateTokenizerKey (t .Context (), & preprocessing.GetOrCreateTokenizerKeyRequest {
222- IsLocal : true ,
223- Model : "testdata/" + model ,
224- })
226+ testTokenizer , err := tokenization .NewCachedLocalTokenizer (t .Context (), model , localTokenizerConfig )
225227 require .NoError (t , err )
226228
227- // render the chat template
228229 renderReq := & preprocessing.ApplyChatTemplateRequest {
229- Key : tokenizerCacheKey ,
230- Conversation : [][]preprocessing.Conversation {conversations },
230+ Conversation : [][]preprocessing.Conversation {conversation },
231231 ChatTemplate : req .ChatCompletions .ChatTemplate ,
232232 }
233- rendered , err := processor .ApplyChatTemplate (t .Context (), renderReq )
234- require .NoError (t , err )
235-
236- // tokenize rendered prompt
237- testTokenizer , err := tokenization .NewCachedLocalTokenizer (t .Context (), model , localTokenizerConfig )
233+ rendered , err := testTokenizer .ApplyChatTemplate (model , renderReq )
238234 require .NoError (t , err )
239235
240236 tokens , _ , err := testTokenizer .Encode (rendered , model , false )
@@ -294,7 +290,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
294290 },
295291 request : & types.LLMRequest {
296292 RequestId : "test-request" ,
297- TargetModel : "test-model" ,
293+ TargetModel : modelName ,
298294 Body : & types.LLMRequestBody {
299295 Completions : & types.CompletionsRequest {
300296 Prompt : prompt ,
@@ -363,7 +359,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
363359 },
364360 request : & types.LLMRequest {
365361 RequestId : "test-request" ,
366- TargetModel : "test-model" ,
362+ TargetModel : modelName ,
367363 Body : & types.LLMRequestBody {
368364 Completions : & types.CompletionsRequest {
369365 Prompt : prompt ,
@@ -423,7 +419,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
423419 },
424420 request : & types.LLMRequest {
425421 RequestId : "test-request" ,
426- TargetModel : "test-model" ,
422+ TargetModel : modelName ,
427423 Body : & types.LLMRequestBody {
428424 Completions : & types.CompletionsRequest {
429425 Prompt : "This prompt has never been cached before on any pod." ,
@@ -462,7 +458,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
462458 },
463459 request : & types.LLMRequest {
464460 RequestId : "test-request" ,
465- TargetModel : "test-model" ,
461+ TargetModel : modelName ,
466462 Body : & types.LLMRequestBody {
467463 Completions : & types.CompletionsRequest {
468464 Prompt : prompt ,
@@ -512,17 +508,18 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
512508 ctx := utils .NewTestContext (t )
513509
514510 kvcacheConfig , err := kvcache .NewDefaultConfig ()
511+ require .NoError (t , err )
515512 kvcacheConfig .TokenizersPoolConfig = & tokenization.Config {
516- ModelName : "test-model" ,
513+ ModelName : modelName ,
517514 WorkersCount : 1 ,
518515 MinPrefixOverlapRatio : 0.8 ,
519516 LocalTokenizerConfig : & localTokenizerConfig ,
520517 }
521- require .NoError (t , err )
522518
523519 prefixCacheScorer , err := New (ctx , PrecisePrefixCachePluginConfig {
524- IndexerConfig : kvcacheConfig ,
525- KVEventsConfig : kvevents .DefaultConfig (),
520+ IndexerConfig : kvcacheConfig ,
521+ KVEventsConfig : kvevents .DefaultConfig (),
522+ TokenProcessorConfig : kvblock .DefaultTokenProcessorConfig (),
526523 })
527524 require .NoError (t , err )
528525 require .NotNil (t , prefixCacheScorer )
0 commit comments