@@ -328,15 +328,11 @@ def test_kv_stack_field_coverage(
328328 },
329329 {"key" : "llm.load.llama.evalBatchSize" , "value" : 42 },
330330 {"key" : "llm.load.llama.flashAttention" , "value" : False },
331- {"key" : "llm.load.llama.keepModelInMemory" , "value" : True },
332331 {
333- "key" : "llm.load.llama.llamaKCacheQuantizationType " ,
332+ "key" : "llm.load.llama.kCacheQuantizationType " ,
334333 "value" : {"checked" : True , "value" : "q8_0" },
335334 },
336- {
337- "key" : "llm.load.llama.llamaVCacheQuantizationType" ,
338- "value" : {"checked" : True , "value" : "f32" },
339- },
335+ {"key" : "llm.load.llama.keepModelInMemory" , "value" : True },
340336 {
341337 "key" : "llm.load.llama.ropeFrequencyBase" ,
342338 "value" : {"checked" : True , "value" : 10.0 },
@@ -347,6 +343,10 @@ def test_kv_stack_field_coverage(
347343 },
348344 {"key" : "llm.load.llama.tryMmap" , "value" : False },
349345 {"key" : "llm.load.llama.useFp16ForKVCache" , "value" : True },
346+ {
347+ "key" : "llm.load.llama.vCacheQuantizationType" ,
348+ "value" : {"checked" : True , "value" : "f32" },
349+ },
350350 {"key" : "llm.load.numExperts" , "value" : 0 },
351351 {"key" : "llm.load.offloadKVCacheToGpu" , "value" : False },
352352 {"key" : "llm.load.seed" , "value" : {"checked" : True , "value" : 313 }},
0 commit comments