Skip to content

Commit eb8b8a5

Browse files
authored
Fix llama quantization config mapping (#111)
Closes #107
1 parent 623bb1f commit eb8b8a5

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

src/lmstudio/_kv_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,8 @@ def _gpu_split_config_to_gpu_settings(
224224
**_COMMON_LLAMA_LOAD_KEYS,
225225
"evalBatchSize": ConfigField("evalBatchSize"),
226226
"flashAttention": ConfigField("flashAttention"),
227-
"llamaKCacheQuantizationType": CheckboxField("llamaKCacheQuantizationType"),
228-
"llamaVCacheQuantizationType": CheckboxField("llamaVCacheQuantizationType"),
227+
"kCacheQuantizationType": CheckboxField("llamaKCacheQuantizationType"),
228+
"vCacheQuantizationType": CheckboxField("llamaVCacheQuantizationType"),
229229
"useFp16ForKVCache": ConfigField("useFp16ForKVCache"),
230230
},
231231
},

tests/test_kv_config.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -328,15 +328,11 @@ def test_kv_stack_field_coverage(
328328
},
329329
{"key": "llm.load.llama.evalBatchSize", "value": 42},
330330
{"key": "llm.load.llama.flashAttention", "value": False},
331-
{"key": "llm.load.llama.keepModelInMemory", "value": True},
332331
{
333-
"key": "llm.load.llama.llamaKCacheQuantizationType",
332+
"key": "llm.load.llama.kCacheQuantizationType",
334333
"value": {"checked": True, "value": "q8_0"},
335334
},
336-
{
337-
"key": "llm.load.llama.llamaVCacheQuantizationType",
338-
"value": {"checked": True, "value": "f32"},
339-
},
335+
{"key": "llm.load.llama.keepModelInMemory", "value": True},
340336
{
341337
"key": "llm.load.llama.ropeFrequencyBase",
342338
"value": {"checked": True, "value": 10.0},
@@ -347,6 +343,10 @@ def test_kv_stack_field_coverage(
347343
},
348344
{"key": "llm.load.llama.tryMmap", "value": False},
349345
{"key": "llm.load.llama.useFp16ForKVCache", "value": True},
346+
{
347+
"key": "llm.load.llama.vCacheQuantizationType",
348+
"value": {"checked": True, "value": "f32"},
349+
},
350350
{"key": "llm.load.numExperts", "value": 0},
351351
{"key": "llm.load.offloadKVCacheToGpu", "value": False},
352352
{"key": "llm.load.seed", "value": {"checked": True, "value": 313}},

0 commit comments

Comments
 (0)