diff --git a/src/lmstudio/_kv_config.py b/src/lmstudio/_kv_config.py index e558531..589d1f5 100644 --- a/src/lmstudio/_kv_config.py +++ b/src/lmstudio/_kv_config.py @@ -77,21 +77,39 @@ def _gpu_offload_fields( offload_settings: DictObject, ) -> Sequence[KvConfigFieldDict]: fields: list[KvConfigFieldDict] = [] - gpu_keys = ( - ("ratio", f"{endpoint}.load.llama.acceleration.offloadRatio"), - ("mainGpu", "llama.load.mainGpu"), - ("splitStrategy", "llama.load.splitStrategy"), - ) - for key, mapped_key in gpu_keys: + remaining_keys = set(offload_settings.keys()) + simple_gpu_keys = (("ratio", f"{endpoint}.load.llama.acceleration.offloadRatio"),) + for key, mapped_key in simple_gpu_keys: if key in offload_settings: + remaining_keys.remove(key) fields.append({"key": mapped_key, "value": offload_settings[key]}) + split_config_keys = ("mainGpu", "splitStrategy", "disabledGpus") + split_config_settings: dict[str, Any] = {} + for key in split_config_keys: + if key in offload_settings: + remaining_keys.remove(key) + split_config_settings[key] = offload_settings[key] + if split_config_settings: + fields.append({"key": "load.gpuSplitConfig", "value": split_config_settings}) + if remaining_keys: + raise LMStudioValueError( + f"Unknown GPU offload settings: {sorted(remaining_keys)}" + ) return fields # Some fields have different names in the client and server configs +# (this map has also been used to avoid adding new key categories for new setting scopes) _CLIENT_TO_SERVER_KEYMAP = { "maxTokens": "maxPredictedTokens", "rawTools": "tools", + # "reasoning" scope + "reasoningParsing": "reasoning.parsing", + # "speculativeDecoding" scope + "draftModel": "speculativeDecoding.draftModel", + "speculativeDecodingNumDraftTokensExact": "speculativeDecoding.numDraftTokensExact", + "speculativeDecodingMinDraftLengthToConsider": "speculativeDecoding.minDraftLengthToConsider", + "speculativeDecodingMinContinueDraftingProbability": "speculativeDecoding.minContinueDraftingProbability", } @@ -99,6 +117,9 @@ def _to_server_key(key: str) -> str: return _CLIENT_TO_SERVER_KEYMAP.get(key, key) +_NOT_YET_SUPPORTED_KEYS: set[str] = set() + + def _to_kv_config_stack_base( config: DictObject, namespace: str, @@ -114,9 +135,12 @@ def _to_kv_config_stack_base( # TODO: Define a JSON or TOML data file for mapping prediction config # fields to config stack entries (preferably JSON exported by # lmstudio-js rather than something maintained in the Python SDK) + # https://github.com/lmstudio-ai/lmstudio-js/issues/253 + remaining_keys = set(config.keys() - _NOT_YET_SUPPORTED_KEYS) for client_key in checkbox_keys: if client_key in config: + remaining_keys.remove(client_key) server_key = _to_server_key(client_key) fields.append( _to_checkbox_kv( @@ -125,12 +149,14 @@ def _to_kv_config_stack_base( ) for client_key in simple_keys: if client_key in config: + remaining_keys.remove(client_key) server_key = _to_server_key(client_key) fields.append( _to_simple_kv(f"{namespace}.{request}", server_key, config[client_key]) ) for client_key in llama_keys: if client_key in config: + remaining_keys.remove(client_key) server_key = _to_server_key(client_key) fields.append( _to_simple_kv( @@ -139,6 +165,7 @@ def _to_kv_config_stack_base( ) for client_key in llama_checkbox_keys: if client_key in config: + remaining_keys.remove(client_key) server_key = _to_server_key(client_key) fields.append( _to_checkbox_kv( @@ -149,8 +176,12 @@ def _to_kv_config_stack_base( ) for gpu_offload_key in gpu_offload_keys: if gpu_offload_key in config: + remaining_keys.remove(gpu_offload_key) fields.extend(_gpu_offload_fields(namespace, config[gpu_offload_key])) + if remaining_keys: + raise LMStudioValueError(f"Unknown config settings: {sorted(remaining_keys)}") + return fields @@ -180,6 +211,7 @@ def _to_kv_config_stack_base( ], } + _EMBEDDING_LOAD_CONFIG_KEYS = { "checkbox_keys": [], "simple_keys": [ @@ -253,6 +285,11 @@ def load_config_to_kv_config_stack( "topKSampling", "toolCallStopStrings", "rawTools", + "reasoningParsing", + "draftModel", + "speculativeDecodingNumDraftTokensExact", + "speculativeDecodingMinDraftLengthToConsider", + "speculativeDecodingMinContinueDraftingProbability", ], "llama_keys": [ "cpuThreads", diff --git a/tests/test_kv_config.py b/tests/test_kv_config.py index f9f2a30..3198cdb 100644 --- a/tests/test_kv_config.py +++ b/tests/test_kv_config.py @@ -247,15 +247,7 @@ def test_snake_case_conversion( config_type._from_api_dict(input_dict) -_NOT_YET_SUPPORTED_KEYS = { - "disabledGpus", - "reasoningParsing", - # "speculativeDecoding" scope - "draftModel", - "speculativeDecodingNumDraftTokensExact", - "speculativeDecodingMinDraftLengthToConsider", - "speculativeDecodingMinContinueDraftingProbability", -} +_NOT_YET_SUPPORTED_KEYS: set[str] = set() @pytest.mark.parametrize("keymap_dict,config_type", zip(KEYMAP_DICTS, KEYMAP_TYPES)) @@ -292,8 +284,14 @@ def test_kv_stack_field_coverage( "key": "embedding.load.llama.acceleration.offloadRatio", "value": 0.5, }, - {"key": "llama.load.mainGpu", "value": 0}, - {"key": "llama.load.splitStrategy", "value": "evenly"}, + { + "key": "load.gpuSplitConfig", + "value": { + "mainGpu": 0, + "splitStrategy": "evenly", + "disabledGpus": [1, 2], + }, + }, ], }, "layerName": "apiOverride", @@ -332,8 +330,14 @@ def test_kv_stack_field_coverage( "value": {"checked": True, "value": "f32"}, }, {"key": "llm.load.llama.acceleration.offloadRatio", "value": 0.5}, - {"key": "llama.load.mainGpu", "value": 0}, - {"key": "llama.load.splitStrategy", "value": "evenly"}, + { + "key": "load.gpuSplitConfig", + "value": { + "mainGpu": 0, + "splitStrategy": "evenly", + "disabledGpus": [1, 2], + }, + }, ] }, } @@ -392,7 +396,27 @@ def test_kv_stack_field_coverage( "value": ["yellow"], }, {"key": "llm.prediction.tools", "value": {"type": "none"}}, - {"key": "llm.prediction.llama.cpuThreads", "value": 7.0}, + { + "key": "llm.prediction.reasoning.parsing", + "value": {"enabled": False, "startString": "", "endString": ""}, + }, + { + "key": "llm.prediction.speculativeDecoding.draftModel", + "value": "some-model-key", + }, + { + "key": "llm.prediction.speculativeDecoding.numDraftTokensExact", + "value": 2, + }, + { + "key": "llm.prediction.speculativeDecoding.minDraftLengthToConsider", + "value": 5, + }, + { + "key": "llm.prediction.speculativeDecoding.minContinueDraftingProbability", + "value": 0.1, + }, + {"key": "llm.prediction.llama.cpuThreads", "value": 7}, ], }, "layerName": "apiOverride",