diff --git a/src/lmstudio/_kv_config.py b/src/lmstudio/_kv_config.py index 04036c2..75aa8ba 100644 --- a/src/lmstudio/_kv_config.py +++ b/src/lmstudio/_kv_config.py @@ -3,13 +3,32 @@ # Known KV config settings are defined in # https://github.com/lmstudio-ai/lmstudio-js/blob/main/packages/lms-kv-config/src/schema.ts from dataclasses import dataclass -from typing import Any, Container, Iterable, Sequence, Type, TypeAlias, TypeVar, cast +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Container, + Iterable, + Sequence, + Type, + TypeAlias, + TypeVar, + cast, + get_args, +) +from typing_extensions import ( + # Native in 3.11+ + assert_never, +) from .sdk_api import LMStudioValueError from .schemas import DictObject, DictSchema, ModelSchema, MutableDictObject from ._sdk_models import ( EmbeddingLoadModelConfig, EmbeddingLoadModelConfigDict, + GpuSettingDict, + GpuSplitConfig, + GpuSplitConfigDict, KvConfig, KvConfigFieldDict, KvConfigStack, @@ -18,6 +37,7 @@ LlmLoadModelConfigDict, LlmPredictionConfig, LlmPredictionConfigDict, + LlmSplitStrategy, LlmStructuredPredictionSetting, LlmStructuredPredictionSettingDict, ) @@ -54,7 +74,7 @@ def to_kv_field( def update_client_config( self, client_config: MutableDictObject, value: DictObject ) -> None: - if value.get("key", False): + if value.get("checked", False): client_config[self.client_key] = value["value"] @@ -84,26 +104,24 @@ def update_client_config( @dataclass(frozen=True) class MultiPartField(ConfigField): nested_keys: tuple[str, ...] + client_to_server: Callable[..., Any] + server_to_client: Callable[[DictObject, MutableDictObject], None] def to_kv_field( self, server_key: str, client_config: DictObject ) -> KvConfigFieldDict | None: - containing_value = client_config[self.client_key] - value: dict[str, Any] = {} - for key in self.nested_keys: - value[key] = containing_value[key] + client_container: DictObject = client_config[self.client_key] + values = (client_container.get(key, None) for key in self.nested_keys) return { "key": server_key, - "value": value, + "value": self.client_to_server(*values), } def update_client_config( - self, client_config: MutableDictObject, value: DictObject + self, client_config: MutableDictObject, server_value: DictObject ) -> None: - containing_value = client_config.setdefault(self.client_key, {}) - for key in self.nested_keys: - if key in value: - containing_value[key] = value[key] + client_container: MutableDictObject = client_config.setdefault(self.client_key, {}) + self.server_to_client(server_value, client_container) # TODO: figure out a way to compare this module against the lmstudio-js mappings @@ -125,10 +143,68 @@ def update_client_config( "contextLength": ConfigField("contextLength"), } + +def _gpu_settings_to_gpu_split_config( + main_gpu: int | None, + llm_split_strategy: LlmSplitStrategy | None, + disabledGpus: Sequence[int] | None, +) -> GpuSplitConfigDict: + gpu_split_config: GpuSplitConfigDict = { + "disabledGpus": [*disabledGpus] if disabledGpus else [], + "strategy": "evenly", + "priority": [], + "customRatio": [], + } + match llm_split_strategy: + case "evenly" | None: + pass + case "favorMainGpu": + gpu_split_config["strategy"] = "priorityOrder" + if main_gpu is not None: + gpu_split_config["priority"] = [main_gpu] + case _: + if TYPE_CHECKING: + assert_never(llm_split_strategy) + err_msg = f"Unknown LLM GPU offload split strategy: {llm_split_strategy}" + hint = f"Known strategies: {get_args(LlmSplitStrategy)}" + raise LMStudioValueError(f"{err_msg} ({hint})") + return gpu_split_config + + +def _gpu_split_config_to_gpu_settings( + server_dict: DictObject, client_dict: MutableDictObject +) -> None: + gpu_settings_dict: GpuSettingDict = cast(GpuSettingDict, client_dict) + gpu_split_config = GpuSplitConfig._from_any_api_dict(server_dict) + disabled_gpus = gpu_split_config.disabled_gpus + if disabled_gpus is not None: + gpu_settings_dict["disabledGpus"] = disabled_gpus + match gpu_split_config.strategy: + case "evenly": + gpu_settings_dict["splitStrategy"] = "evenly" + case "priorityOrder": + # For now, this can only map to "favorMainGpu" + # Skip reporting the GPU offload details otherwise + priority = gpu_split_config.priority + if priority is not None and len(priority) == 1: + gpu_settings_dict["splitStrategy"] = "favorMainGpu" + gpu_settings_dict["mainGpu"] = priority[0] + case "custom": + # Currently no way to set up or report custom offload settings + pass + case _: + if TYPE_CHECKING: + assert_never(gpu_split_config.strategy) + # Simply don't report details for unknown server strategies + + SUPPORTED_SERVER_KEYS: dict[str, DictObject] = { "load": { "gpuSplitConfig": MultiPartField( - "gpu", ("mainGpu", "splitStrategy", "disabledGpus") + "gpu", + ("mainGpu", "splitStrategy", "disabledGpus"), + _gpu_settings_to_gpu_split_config, + _gpu_split_config_to_gpu_settings, ), "gpuStrictVramCap": ConfigField("gpuStrictVramCap"), }, diff --git a/tests/test_kv_config.py b/tests/test_kv_config.py index 24abace..213e736 100644 --- a/tests/test_kv_config.py +++ b/tests/test_kv_config.py @@ -1,6 +1,7 @@ """Test translation from flat dict configs to KvConfig layer stacks.""" -from typing import Any +from copy import deepcopy +from typing import Any, Iterator, cast, get_args import msgspec @@ -14,6 +15,7 @@ TO_SERVER_LOAD_LLM, TO_SERVER_PREDICTION, load_config_to_kv_config_stack, + parse_server_config, prediction_config_to_kv_config_stack, ) from lmstudio._sdk_models import ( @@ -21,10 +23,13 @@ EmbeddingLoadModelConfigDict, GpuSetting, GpuSettingDict, + GpuSplitConfigDict, + KvConfigStackDict, LlmLoadModelConfig, LlmLoadModelConfigDict, LlmPredictionConfig, LlmPredictionConfigDict, + LlmSplitStrategy, ) # Note: configurations below are just for data manipulation round-trip testing, @@ -262,7 +267,7 @@ def test_kv_stack_field_coverage( assert not unknown_keys -EXPECTED_KV_STACK_LOAD_EMBEDDING = { +EXPECTED_KV_STACK_LOAD_EMBEDDING: KvConfigStackDict = { "layers": [ { "config": { @@ -275,9 +280,10 @@ def test_kv_stack_field_coverage( { "key": "load.gpuSplitConfig", "value": { - "mainGpu": 0, - "splitStrategy": "evenly", "disabledGpus": [1, 2], + "strategy": "evenly", + "priority": [], + "customRatio": [], }, }, {"key": "embedding.load.llama.keepModelInMemory", "value": True}, @@ -297,7 +303,7 @@ def test_kv_stack_field_coverage( ], } -EXPECTED_KV_STACK_LOAD_LLM = { +EXPECTED_KV_STACK_LOAD_LLM: KvConfigStackDict = { "layers": [ { "layerName": "apiOverride", @@ -308,9 +314,10 @@ def test_kv_stack_field_coverage( { "key": "load.gpuSplitConfig", "value": { - "mainGpu": 0, - "splitStrategy": "evenly", "disabledGpus": [1, 2], + "strategy": "evenly", + "priority": [], + "customRatio": [], }, }, {"key": "llm.load.llama.evalBatchSize", "value": 42}, @@ -343,7 +350,7 @@ def test_kv_stack_field_coverage( ] } -EXPECTED_KV_STACK_PREDICTION = { +EXPECTED_KV_STACK_PREDICTION: KvConfigStackDict = { "layers": [ { "config": { @@ -438,6 +445,64 @@ def test_kv_stack_load_config_llm(config_dict: DictObject) -> None: assert kv_stack.to_dict() == EXPECTED_KV_STACK_LOAD_LLM +def test_parse_server_config_load_embedding() -> None: + server_config = EXPECTED_KV_STACK_LOAD_EMBEDDING["layers"][0]["config"] + expected_client_config = deepcopy(LOAD_CONFIG_EMBEDDING) + gpu_settings_dict = expected_client_config["gpu"] + assert gpu_settings_dict is not None + del gpu_settings_dict["mainGpu"] # This is not reported with "evenly" strategy + assert parse_server_config(server_config) == expected_client_config + + +def test_parse_server_config_load_llm() -> None: + server_config = EXPECTED_KV_STACK_LOAD_LLM["layers"][0]["config"] + expected_client_config = deepcopy(LOAD_CONFIG_LLM) + gpu_settings_dict = expected_client_config["gpu"] + assert gpu_settings_dict is not None + del gpu_settings_dict["mainGpu"] # This is not reported with "evenly" strategy + assert parse_server_config(server_config) == expected_client_config + + +def _other_gpu_split_strategies() -> Iterator[LlmSplitStrategy]: + # Ensure all GPU split strategies are checked (these aren't simple structural transforms, + # so the default test case doesn't provide adequate test coverage ) + for split_strategy in get_args(LlmSplitStrategy): + if split_strategy == GPU_CONFIG["splitStrategy"]: + continue + yield split_strategy + + +def _find_config_field(stack_dict: KvConfigStackDict, key: str) -> Any: + for field in stack_dict["layers"][0]["config"]["fields"]: + if field["key"] == key: + return field["value"] + raise KeyError(key) + + +@pytest.mark.parametrize("split_strategy", _other_gpu_split_strategies()) +def test_other_gpu_split_strategy_config(split_strategy: LlmSplitStrategy) -> None: + expected_stack = deepcopy(EXPECTED_KV_STACK_LOAD_LLM) + if split_strategy == "favorMainGpu": + expected_split_config: GpuSplitConfigDict = _find_config_field( + expected_stack, "load.gpuSplitConfig" + ) + expected_split_config["strategy"] = "priorityOrder" + main_gpu = GPU_CONFIG["mainGpu"] + assert main_gpu is not None + expected_split_config["priority"] = [main_gpu] + else: + assert split_strategy is None, "Unknown LLM GPU offset split strategy" + input_camelCase = deepcopy(LOAD_CONFIG_LLM) + input_snake_case = deepcopy(SC_LOAD_CONFIG_LLM) + gpu_camelCase: GpuSettingDict = cast(Any, input_camelCase["gpu"]) + gpu_snake_case: dict[str, Any] = cast(Any, input_snake_case["gpu"]) + gpu_camelCase["splitStrategy"] = gpu_snake_case["split_strategy"] = split_strategy + kv_stack = load_config_to_kv_config_stack(input_camelCase, LlmLoadModelConfig) + assert kv_stack.to_dict() == expected_stack + kv_stack = load_config_to_kv_config_stack(input_snake_case, LlmLoadModelConfig) + assert kv_stack.to_dict() == expected_stack + + @pytest.mark.parametrize("config_dict", (PREDICTION_CONFIG, SC_PREDICTION_CONFIG)) def test_kv_stack_prediction_config(config_dict: DictObject) -> None: # MyPy complains here that it can't be sure the dict has all the right keys