Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 89 additions & 13 deletions src/lmstudio/_kv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,32 @@
# Known KV config settings are defined in
# https://github.com/lmstudio-ai/lmstudio-js/blob/main/packages/lms-kv-config/src/schema.ts
from dataclasses import dataclass
from typing import Any, Container, Iterable, Sequence, Type, TypeAlias, TypeVar, cast
from typing import (
TYPE_CHECKING,
Any,
Callable,
Container,
Iterable,
Sequence,
Type,
TypeAlias,
TypeVar,
cast,
get_args,
)
from typing_extensions import (
# Native in 3.11+
assert_never,
)

from .sdk_api import LMStudioValueError
from .schemas import DictObject, DictSchema, ModelSchema, MutableDictObject
from ._sdk_models import (
EmbeddingLoadModelConfig,
EmbeddingLoadModelConfigDict,
GpuSettingDict,
GpuSplitConfig,
GpuSplitConfigDict,
KvConfig,
KvConfigFieldDict,
KvConfigStack,
Expand All @@ -18,6 +37,7 @@
LlmLoadModelConfigDict,
LlmPredictionConfig,
LlmPredictionConfigDict,
LlmSplitStrategy,
LlmStructuredPredictionSetting,
LlmStructuredPredictionSettingDict,
)
Expand Down Expand Up @@ -54,7 +74,7 @@ def to_kv_field(
def update_client_config(
self, client_config: MutableDictObject, value: DictObject
) -> None:
if value.get("key", False):
if value.get("checked", False):
client_config[self.client_key] = value["value"]


Expand Down Expand Up @@ -84,26 +104,24 @@ def update_client_config(
@dataclass(frozen=True)
class MultiPartField(ConfigField):
nested_keys: tuple[str, ...]
client_to_server: Callable[..., Any]
server_to_client: Callable[[DictObject, MutableDictObject], None]

def to_kv_field(
self, server_key: str, client_config: DictObject
) -> KvConfigFieldDict | None:
containing_value = client_config[self.client_key]
value: dict[str, Any] = {}
for key in self.nested_keys:
value[key] = containing_value[key]
client_container: DictObject = client_config[self.client_key]
values = (client_container.get(key, None) for key in self.nested_keys)
return {
"key": server_key,
"value": value,
"value": self.client_to_server(*values),
}

def update_client_config(
self, client_config: MutableDictObject, value: DictObject
self, client_config: MutableDictObject, server_value: DictObject
) -> None:
containing_value = client_config.setdefault(self.client_key, {})
for key in self.nested_keys:
if key in value:
containing_value[key] = value[key]
client_container: MutableDictObject = client_config.setdefault(self.client_key, {})
self.server_to_client(server_value, client_container)


# TODO: figure out a way to compare this module against the lmstudio-js mappings
Expand All @@ -125,10 +143,68 @@ def update_client_config(
"contextLength": ConfigField("contextLength"),
}


def _gpu_settings_to_gpu_split_config(
main_gpu: int | None,
llm_split_strategy: LlmSplitStrategy | None,
disabledGpus: Sequence[int] | None,
) -> GpuSplitConfigDict:
gpu_split_config: GpuSplitConfigDict = {
"disabledGpus": [*disabledGpus] if disabledGpus else [],
"strategy": "evenly",
"priority": [],
"customRatio": [],
}
match llm_split_strategy:
case "evenly" | None:
pass
case "favorMainGpu":
gpu_split_config["strategy"] = "priorityOrder"
if main_gpu is not None:
gpu_split_config["priority"] = [main_gpu]
case _:
if TYPE_CHECKING:
assert_never(llm_split_strategy)
err_msg = f"Unknown LLM GPU offload split strategy: {llm_split_strategy}"
hint = f"Known strategies: {get_args(LlmSplitStrategy)}"
raise LMStudioValueError(f"{err_msg} ({hint})")
return gpu_split_config


def _gpu_split_config_to_gpu_settings(
server_dict: DictObject, client_dict: MutableDictObject
) -> None:
gpu_settings_dict: GpuSettingDict = cast(GpuSettingDict, client_dict)
gpu_split_config = GpuSplitConfig._from_any_api_dict(server_dict)
disabled_gpus = gpu_split_config.disabled_gpus
if disabled_gpus is not None:
gpu_settings_dict["disabledGpus"] = disabled_gpus
match gpu_split_config.strategy:
case "evenly":
gpu_settings_dict["splitStrategy"] = "evenly"
case "priorityOrder":
# For now, this can only map to "favorMainGpu"
# Skip reporting the GPU offload details otherwise
priority = gpu_split_config.priority
if priority is not None and len(priority) == 1:
gpu_settings_dict["splitStrategy"] = "favorMainGpu"
gpu_settings_dict["mainGpu"] = priority[0]
case "custom":
# Currently no way to set up or report custom offload settings
pass
case _:
if TYPE_CHECKING:
assert_never(gpu_split_config.strategy)
# Simply don't report details for unknown server strategies


SUPPORTED_SERVER_KEYS: dict[str, DictObject] = {
"load": {
"gpuSplitConfig": MultiPartField(
"gpu", ("mainGpu", "splitStrategy", "disabledGpus")
"gpu",
("mainGpu", "splitStrategy", "disabledGpus"),
_gpu_settings_to_gpu_split_config,
_gpu_split_config_to_gpu_settings,
),
"gpuStrictVramCap": ConfigField("gpuStrictVramCap"),
},
Expand Down
81 changes: 73 additions & 8 deletions tests/test_kv_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Test translation from flat dict configs to KvConfig layer stacks."""

from typing import Any
from copy import deepcopy
from typing import Any, Iterator, cast, get_args

import msgspec

Expand All @@ -14,17 +15,21 @@
TO_SERVER_LOAD_LLM,
TO_SERVER_PREDICTION,
load_config_to_kv_config_stack,
parse_server_config,
prediction_config_to_kv_config_stack,
)
from lmstudio._sdk_models import (
EmbeddingLoadModelConfig,
EmbeddingLoadModelConfigDict,
GpuSetting,
GpuSettingDict,
GpuSplitConfigDict,
KvConfigStackDict,
LlmLoadModelConfig,
LlmLoadModelConfigDict,
LlmPredictionConfig,
LlmPredictionConfigDict,
LlmSplitStrategy,
)

# Note: configurations below are just for data manipulation round-trip testing,
Expand Down Expand Up @@ -262,7 +267,7 @@ def test_kv_stack_field_coverage(
assert not unknown_keys


EXPECTED_KV_STACK_LOAD_EMBEDDING = {
EXPECTED_KV_STACK_LOAD_EMBEDDING: KvConfigStackDict = {
"layers": [
{
"config": {
Expand All @@ -275,9 +280,10 @@ def test_kv_stack_field_coverage(
{
"key": "load.gpuSplitConfig",
"value": {
"mainGpu": 0,
"splitStrategy": "evenly",
"disabledGpus": [1, 2],
"strategy": "evenly",
"priority": [],
"customRatio": [],
},
},
{"key": "embedding.load.llama.keepModelInMemory", "value": True},
Expand All @@ -297,7 +303,7 @@ def test_kv_stack_field_coverage(
],
}

EXPECTED_KV_STACK_LOAD_LLM = {
EXPECTED_KV_STACK_LOAD_LLM: KvConfigStackDict = {
"layers": [
{
"layerName": "apiOverride",
Expand All @@ -308,9 +314,10 @@ def test_kv_stack_field_coverage(
{
"key": "load.gpuSplitConfig",
"value": {
"mainGpu": 0,
"splitStrategy": "evenly",
"disabledGpus": [1, 2],
"strategy": "evenly",
"priority": [],
"customRatio": [],
},
},
{"key": "llm.load.llama.evalBatchSize", "value": 42},
Expand Down Expand Up @@ -343,7 +350,7 @@ def test_kv_stack_field_coverage(
]
}

EXPECTED_KV_STACK_PREDICTION = {
EXPECTED_KV_STACK_PREDICTION: KvConfigStackDict = {
"layers": [
{
"config": {
Expand Down Expand Up @@ -438,6 +445,64 @@ def test_kv_stack_load_config_llm(config_dict: DictObject) -> None:
assert kv_stack.to_dict() == EXPECTED_KV_STACK_LOAD_LLM


def test_parse_server_config_load_embedding() -> None:
server_config = EXPECTED_KV_STACK_LOAD_EMBEDDING["layers"][0]["config"]
expected_client_config = deepcopy(LOAD_CONFIG_EMBEDDING)
gpu_settings_dict = expected_client_config["gpu"]
assert gpu_settings_dict is not None
del gpu_settings_dict["mainGpu"] # This is not reported with "evenly" strategy
assert parse_server_config(server_config) == expected_client_config


def test_parse_server_config_load_llm() -> None:
server_config = EXPECTED_KV_STACK_LOAD_LLM["layers"][0]["config"]
expected_client_config = deepcopy(LOAD_CONFIG_LLM)
gpu_settings_dict = expected_client_config["gpu"]
assert gpu_settings_dict is not None
del gpu_settings_dict["mainGpu"] # This is not reported with "evenly" strategy
assert parse_server_config(server_config) == expected_client_config


def _other_gpu_split_strategies() -> Iterator[LlmSplitStrategy]:
# Ensure all GPU split strategies are checked (these aren't simple structural transforms,
# so the default test case doesn't provide adequate test coverage )
for split_strategy in get_args(LlmSplitStrategy):
if split_strategy == GPU_CONFIG["splitStrategy"]:
continue
yield split_strategy


def _find_config_field(stack_dict: KvConfigStackDict, key: str) -> Any:
for field in stack_dict["layers"][0]["config"]["fields"]:
if field["key"] == key:
return field["value"]
raise KeyError(key)


@pytest.mark.parametrize("split_strategy", _other_gpu_split_strategies())
def test_other_gpu_split_strategy_config(split_strategy: LlmSplitStrategy) -> None:
expected_stack = deepcopy(EXPECTED_KV_STACK_LOAD_LLM)
if split_strategy == "favorMainGpu":
expected_split_config: GpuSplitConfigDict = _find_config_field(
expected_stack, "load.gpuSplitConfig"
)
expected_split_config["strategy"] = "priorityOrder"
main_gpu = GPU_CONFIG["mainGpu"]
assert main_gpu is not None
expected_split_config["priority"] = [main_gpu]
else:
assert split_strategy is None, "Unknown LLM GPU offset split strategy"
input_camelCase = deepcopy(LOAD_CONFIG_LLM)
input_snake_case = deepcopy(SC_LOAD_CONFIG_LLM)
gpu_camelCase: GpuSettingDict = cast(Any, input_camelCase["gpu"])
gpu_snake_case: dict[str, Any] = cast(Any, input_snake_case["gpu"])
gpu_camelCase["splitStrategy"] = gpu_snake_case["split_strategy"] = split_strategy
kv_stack = load_config_to_kv_config_stack(input_camelCase, LlmLoadModelConfig)
assert kv_stack.to_dict() == expected_stack
kv_stack = load_config_to_kv_config_stack(input_snake_case, LlmLoadModelConfig)
assert kv_stack.to_dict() == expected_stack


@pytest.mark.parametrize("config_dict", (PREDICTION_CONFIG, SC_PREDICTION_CONFIG))
def test_kv_stack_prediction_config(config_dict: DictObject) -> None:
# MyPy complains here that it can't be sure the dict has all the right keys
Expand Down