Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions src/lmstudio/_kv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,49 @@ def _gpu_offload_fields(
offload_settings: DictObject,
) -> Sequence[KvConfigFieldDict]:
fields: list[KvConfigFieldDict] = []
gpu_keys = (
("ratio", f"{endpoint}.load.llama.acceleration.offloadRatio"),
("mainGpu", "llama.load.mainGpu"),
("splitStrategy", "llama.load.splitStrategy"),
)
for key, mapped_key in gpu_keys:
remaining_keys = set(offload_settings.keys())
simple_gpu_keys = (("ratio", f"{endpoint}.load.llama.acceleration.offloadRatio"),)
for key, mapped_key in simple_gpu_keys:
if key in offload_settings:
remaining_keys.remove(key)
fields.append({"key": mapped_key, "value": offload_settings[key]})
split_config_keys = ("mainGpu", "splitStrategy", "disabledGpus")
split_config_settings: dict[str, Any] = {}
for key in split_config_keys:
if key in offload_settings:
remaining_keys.remove(key)
split_config_settings[key] = offload_settings[key]
if split_config_settings:
fields.append({"key": "load.gpuSplitConfig", "value": split_config_settings})
if remaining_keys:
raise LMStudioValueError(
f"Unknown GPU offload settings: {sorted(remaining_keys)}"
)
return fields


# Some fields have different names in the client and server configs
# (this map has also been used to avoid adding new key categories for new setting scopes)
_CLIENT_TO_SERVER_KEYMAP = {
"maxTokens": "maxPredictedTokens",
"rawTools": "tools",
# "reasoning" scope
"reasoningParsing": "reasoning.parsing",
# "speculativeDecoding" scope
"draftModel": "speculativeDecoding.draftModel",
"speculativeDecodingNumDraftTokensExact": "speculativeDecoding.numDraftTokensExact",
"speculativeDecodingMinDraftLengthToConsider": "speculativeDecoding.minDraftLengthToConsider",
"speculativeDecodingMinContinueDraftingProbability": "speculativeDecoding.minContinueDraftingProbability",
}


def _to_server_key(key: str) -> str:
return _CLIENT_TO_SERVER_KEYMAP.get(key, key)


_NOT_YET_SUPPORTED_KEYS: set[str] = set()


def _to_kv_config_stack_base(
config: DictObject,
namespace: str,
Expand All @@ -114,9 +135,12 @@ def _to_kv_config_stack_base(
# TODO: Define a JSON or TOML data file for mapping prediction config
# fields to config stack entries (preferably JSON exported by
# lmstudio-js rather than something maintained in the Python SDK)
# https://github.com/lmstudio-ai/lmstudio-js/issues/253
remaining_keys = set(config.keys() - _NOT_YET_SUPPORTED_KEYS)

for client_key in checkbox_keys:
if client_key in config:
remaining_keys.remove(client_key)
server_key = _to_server_key(client_key)
fields.append(
_to_checkbox_kv(
Expand All @@ -125,12 +149,14 @@ def _to_kv_config_stack_base(
)
for client_key in simple_keys:
if client_key in config:
remaining_keys.remove(client_key)
server_key = _to_server_key(client_key)
fields.append(
_to_simple_kv(f"{namespace}.{request}", server_key, config[client_key])
)
for client_key in llama_keys:
if client_key in config:
remaining_keys.remove(client_key)
server_key = _to_server_key(client_key)
fields.append(
_to_simple_kv(
Expand All @@ -139,6 +165,7 @@ def _to_kv_config_stack_base(
)
for client_key in llama_checkbox_keys:
if client_key in config:
remaining_keys.remove(client_key)
server_key = _to_server_key(client_key)
fields.append(
_to_checkbox_kv(
Expand All @@ -149,8 +176,12 @@ def _to_kv_config_stack_base(
)
for gpu_offload_key in gpu_offload_keys:
if gpu_offload_key in config:
remaining_keys.remove(gpu_offload_key)
fields.extend(_gpu_offload_fields(namespace, config[gpu_offload_key]))

if remaining_keys:
raise LMStudioValueError(f"Unknown config settings: {sorted(remaining_keys)}")

return fields


Expand Down Expand Up @@ -180,6 +211,7 @@ def _to_kv_config_stack_base(
],
}


_EMBEDDING_LOAD_CONFIG_KEYS = {
"checkbox_keys": [],
"simple_keys": [
Expand Down Expand Up @@ -253,6 +285,11 @@ def load_config_to_kv_config_stack(
"topKSampling",
"toolCallStopStrings",
"rawTools",
"reasoningParsing",
"draftModel",
"speculativeDecodingNumDraftTokensExact",
"speculativeDecodingMinDraftLengthToConsider",
"speculativeDecodingMinContinueDraftingProbability",
],
"llama_keys": [
"cpuThreads",
Expand Down
52 changes: 38 additions & 14 deletions tests/test_kv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,7 @@ def test_snake_case_conversion(
config_type._from_api_dict(input_dict)


_NOT_YET_SUPPORTED_KEYS = {
"disabledGpus",
"reasoningParsing",
# "speculativeDecoding" scope
"draftModel",
"speculativeDecodingNumDraftTokensExact",
"speculativeDecodingMinDraftLengthToConsider",
"speculativeDecodingMinContinueDraftingProbability",
}
_NOT_YET_SUPPORTED_KEYS: set[str] = set()


@pytest.mark.parametrize("keymap_dict,config_type", zip(KEYMAP_DICTS, KEYMAP_TYPES))
Expand Down Expand Up @@ -292,8 +284,14 @@ def test_kv_stack_field_coverage(
"key": "embedding.load.llama.acceleration.offloadRatio",
"value": 0.5,
},
{"key": "llama.load.mainGpu", "value": 0},
{"key": "llama.load.splitStrategy", "value": "evenly"},
{
"key": "load.gpuSplitConfig",
"value": {
"mainGpu": 0,
"splitStrategy": "evenly",
"disabledGpus": [1, 2],
},
},
],
},
"layerName": "apiOverride",
Expand Down Expand Up @@ -332,8 +330,14 @@ def test_kv_stack_field_coverage(
"value": {"checked": True, "value": "f32"},
},
{"key": "llm.load.llama.acceleration.offloadRatio", "value": 0.5},
{"key": "llama.load.mainGpu", "value": 0},
{"key": "llama.load.splitStrategy", "value": "evenly"},
{
"key": "load.gpuSplitConfig",
"value": {
"mainGpu": 0,
"splitStrategy": "evenly",
"disabledGpus": [1, 2],
},
},
]
},
}
Expand Down Expand Up @@ -392,7 +396,27 @@ def test_kv_stack_field_coverage(
"value": ["yellow"],
},
{"key": "llm.prediction.tools", "value": {"type": "none"}},
{"key": "llm.prediction.llama.cpuThreads", "value": 7.0},
{
"key": "llm.prediction.reasoning.parsing",
"value": {"enabled": False, "startString": "", "endString": ""},
},
{
"key": "llm.prediction.speculativeDecoding.draftModel",
"value": "some-model-key",
},
{
"key": "llm.prediction.speculativeDecoding.numDraftTokensExact",
"value": 2,
},
{
"key": "llm.prediction.speculativeDecoding.minDraftLengthToConsider",
"value": 5,
},
{
"key": "llm.prediction.speculativeDecoding.minContinueDraftingProbability",
"value": 0.1,
},
{"key": "llm.prediction.llama.cpuThreads", "value": 7},
],
},
"layerName": "apiOverride",
Expand Down
Loading