@@ -322,11 +322,10 @@ var _ = Describe("Simulator metrics", Ordered, func() {
322322			Expect (err ).NotTo (HaveOccurred ())
323323		})
324324		It ("Should send correct kv cache usage metrics" , func () {
325- 			modelName  :=  "Qwen/Qwen2-0.5B" 
326325			// Three requests, there are should be two blocks in the kv cache, because 
327326			// the first and the second prompt share a block. 
328327			ctx  :=  context .TODO ()
329- 			args  :=  []string {"cmd" , "--model" , modelName , "--mode" , common .ModeRandom ,
328+ 			args  :=  []string {"cmd" , "--model" , qwenModelName , "--mode" , common .ModeRandom ,
330329				"--enable-kvcache" , "true" , "--kv-cache-size" , "16" , "--block-size" , "8" ,
331330				"--time-to-first-token" , "5000" , "--tokenizers-cache-dir" , tmpDir }
332331
@@ -342,19 +341,19 @@ var _ = Describe("Simulator metrics", Ordered, func() {
342341					Prompt : openai.CompletionNewParamsPromptUnion {
343342						OfString : openai .String ("What is the weather like in Haifa today? Is it cold?" ),
344343					},
345- 					Model : openai .CompletionNewParamsModel (modelName ),
344+ 					Model : openai .CompletionNewParamsModel (qwenModelName ),
346345				},
347346				{
348347					Prompt : openai.CompletionNewParamsPromptUnion {
349348						OfString : openai .String ("What is the weather like in Haifa today?" ),
350349					},
351- 					Model : openai .CompletionNewParamsModel (modelName ),
350+ 					Model : openai .CompletionNewParamsModel (qwenModelName ),
352351				},
353352				{
354353					Prompt : openai.CompletionNewParamsPromptUnion {
355354						OfString : openai .String ("What is the weather like in New York today?" ),
356355					},
357- 					Model : openai .CompletionNewParamsModel (modelName ),
356+ 					Model : openai .CompletionNewParamsModel (qwenModelName ),
358357				},
359358			}
360359
@@ -402,9 +401,8 @@ var _ = Describe("Simulator metrics", Ordered, func() {
402401		})
403402
404403		It ("Should send correct kv cache usage metrics for sequentual requests" , func () {
405- 			modelName  :=  "Qwen/Qwen2-0.5B" 
406404			ctx  :=  context .TODO ()
407- 			args  :=  []string {"cmd" , "--model" , modelName , "--mode" , common .ModeRandom ,
405+ 			args  :=  []string {"cmd" , "--model" , qwenModelName , "--mode" , common .ModeRandom ,
408406				"--enable-kvcache" , "true" , "--kv-cache-size" , "16" , "--block-size" , "8" ,
409407				"--time-to-first-token" , "5000" , "--tokenizers-cache-dir" , tmpDir , "--max-num-seqs" , "2" }
410408
@@ -420,19 +418,19 @@ var _ = Describe("Simulator metrics", Ordered, func() {
420418					Prompt : openai.CompletionNewParamsPromptUnion {
421419						OfString : openai .String ("What is the weather like in Haifa today? Is it cold?" ),
422420					},
423- 					Model : openai .CompletionNewParamsModel (modelName ),
421+ 					Model : openai .CompletionNewParamsModel (qwenModelName ),
424422				},
425423				{
426424					Prompt : openai.CompletionNewParamsPromptUnion {
427425						OfString : openai .String ("What is the weather like in Haifa today?" ),
428426					},
429- 					Model : openai .CompletionNewParamsModel (modelName ),
427+ 					Model : openai .CompletionNewParamsModel (qwenModelName ),
430428				},
431429				{
432430					Prompt : openai.CompletionNewParamsPromptUnion {
433431						OfString : openai .String ("What is the weather like in New York today?" ),
434432					},
435- 					Model : openai .CompletionNewParamsModel (modelName ),
433+ 					Model : openai .CompletionNewParamsModel (qwenModelName ),
436434				},
437435			}
438436
0 commit comments