Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,864 changes: 3,383 additions & 481 deletions sdk-schema/lms-with-inferred-unions.json

Large diffs are not rendered by default.

3,172 changes: 2,803 additions & 369 deletions sdk-schema/lms.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion sdk-schema/lmstudio-js
Submodule lmstudio-js updated 121 files
7 changes: 6 additions & 1 deletion sdk-schema/sync-sdk-schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import ast
import builtins
import json
import re
import shutil
import subprocess
import sys
Expand Down Expand Up @@ -410,11 +411,15 @@ def _generate_data_model_from_json_schema() -> None:
raise RuntimeError(f"Failed to create {_MODEL_PATH!r}")
# Generated source code post-processing:
#
# * Fix up miscellaneous issues the code generator currently mishandles
# * Fix up typed dicts to be defined in terms of nested dicts
# * Add an `__all__` definition for wildcard imports (which also
# serves as a top level summary of the defined schemas)
print("Post-processing generated source code...")
model_source = _MODEL_PATH.read_text()
# Replace unsupported regex character classes with `.`
# https://github.com/python/cpython/issues/95555
# https://github.com/jcrist/msgspec/issues/860
model_source = re.sub(r"\\\\p\{[^}]*\}", ".", _MODEL_PATH.read_text())
model_ast = ast.parse(model_source)
dict_token_replacements: dict[str, str] = {}
exported_names: list[str] = []
Expand Down
5 changes: 4 additions & 1 deletion src/lmstudio/_kv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ def to_kv_field(
def update_client_config(
self, client_config: MutableDictObject, server_value: DictObject
) -> None:
client_container: MutableDictObject = client_config.setdefault(self.client_key, {})
client_container: MutableDictObject = client_config.setdefault(
self.client_key, {}
)
self.server_to_client(server_value, client_container)


Expand Down Expand Up @@ -216,6 +218,7 @@ def _gpu_split_config_to_gpu_settings(
**_COMMON_MODEL_LOAD_KEYS,
"numExperts": ConfigField("numExperts"),
"seed": CheckboxField("seed"),
"offloadKVCacheToGpu": ConfigField("offloadKVCacheToGpu"),
"llama": {
**_COMMON_LLAMA_LOAD_KEYS,
"evalBatchSize": ConfigField("evalBatchSize"),
Expand Down
5,222 changes: 4,118 additions & 1,104 deletions src/lmstudio/_sdk_models/__init__.py

Large diffs are not rendered by default.

14 changes: 8 additions & 6 deletions src/lmstudio/json_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,8 @@ def result(self) -> T:
return self._result

def raise_unknown_message_error(self, unknown_message: Any) -> NoReturn:
# TODO: improve forward compatibility by switching this to use warnings.warn
# instead of failing immediately for all unknown messages
raise LMStudioUnknownMessageError(
f"{self._NOTICE_PREFIX} unexpected message contents: {unknown_message!r}"
)
Expand Down Expand Up @@ -1234,20 +1236,20 @@ def iter_message_events(
# Ignore status updates after cancellation (avoids race condition)
return
yield from self._update_prompt_processing_progress(progress)
case {
"type": "toolCallGenerationStart",
}:
case {"type": "toolCallGenerationStart"}:
self._logger.debug("Notified of pending tool call request generation.")
case {"type": "toolCallGenerationNameReceived"}:
pass # UI event, currently ignored by Python SDK
case {"type": "toolCallGenerationArgumentFragmentGenerated"}:
pass # UI event, currently ignored by Python SDK
case {
"type": "toolCallGenerationEnd",
"toolCallRequest": tool_call_request,
}:
yield PredictionToolCallEvent(
ToolCallRequest._from_api_dict(tool_call_request)
)
case {
"type": "toolCallGenerationFailed",
}:
case {"type": "toolCallGenerationFailed"}:
self._logger.warn("Tool call processing generation failed.")
yield PredictionToolCallAbortedEvent(None)
case {"type": "error", "error": {} as error}:
Expand Down
3 changes: 2 additions & 1 deletion src/lmstudio/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,9 @@ def model_json_schema(cls) -> DictSchema:


_CAMEL_CASE_OVERRIDES = {
# This is the one key in the API that capitalizes the `V` in `KV`
# `_kv_` in snake_case becomes KV in camelCase
"useFp16ForKvCache": "useFp16ForKVCache",
"offloadKvCacheToGpu": "offloadKVCacheToGpu",
}

_SKIP_FIELD_RECURSION = set(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,5 +287,5 @@ def _handle_invalid_request(
assert isinstance(tool_failure_exc.__cause__, ZeroDivisionError)
# If the content checks prove too flaky in practice, they can be dropped
completed_response = predictions[-1].content.lower()
assert "divid" in completed_response # Accepts both "divide" and "dividing"
assert "divid" in completed_response # Accepts both "divide" and "dividing"
assert "zero" in completed_response
10 changes: 8 additions & 2 deletions tests/test_kv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
"llamaKCacheQuantizationType": "q8_0",
"llamaVCacheQuantizationType": "f32",
"numExperts": 0,
"offloadKVCacheToGpu": False,
"ropeFrequencyBase": 10.0,
"ropeFrequencyScale": 1.5,
"seed": 313,
Expand All @@ -93,6 +94,7 @@
"llama_k_cache_quantization_type": "q8_0",
"llama_v_cache_quantization_type": "f32",
"num_experts": 0,
"offload_kv_cache_to_gpu": False,
"rope_frequency_base": 10.0,
"rope_frequency_scale": 1.5,
"seed": 313,
Expand Down Expand Up @@ -221,6 +223,9 @@ class LlmPredictionConfigStrict(LlmPredictionConfig, forbid_unknown_fields=True)
LlmPredictionConfigStrict,
)

# The "raw" debugging field is a special case, with TBD handling
_NOT_YET_MAPPED = {"raw"}


@pytest.mark.parametrize("config_dict,config_type", zip(CONFIG_DICTS, CONFIG_TYPES))
def test_struct_field_coverage(
Expand All @@ -232,7 +237,7 @@ def test_struct_field_coverage(
missing_keys = expected_keys - mapped_keys
assert not missing_keys
# Ensure no extra keys are mistakenly defined
unknown_keys = mapped_keys - expected_keys
unknown_keys = mapped_keys - expected_keys - _NOT_YET_MAPPED
assert not unknown_keys
# Ensure the config can be loaded
config_struct = config_type._from_api_dict(config_dict)
Expand Down Expand Up @@ -260,7 +265,7 @@ def test_kv_stack_field_coverage(
# Ensure all expected keys are covered (even those with default values)
mapped_keys = keymap.keys()
expected_keys = set(config_type.__struct_encode_fields__)
missing_keys = expected_keys - mapped_keys
missing_keys = expected_keys - mapped_keys - _NOT_YET_MAPPED
assert not missing_keys
# Ensure no extra keys are mistakenly defined
unknown_keys = mapped_keys - expected_keys
Expand Down Expand Up @@ -342,6 +347,7 @@ def test_kv_stack_field_coverage(
{"key": "llm.load.llama.tryMmap", "value": False},
{"key": "llm.load.llama.useFp16ForKVCache", "value": True},
{"key": "llm.load.numExperts", "value": 0},
{"key": "llm.load.offloadKVCacheToGpu", "value": False},
{"key": "llm.load.seed", "value": {"checked": True, "value": 313}},
{"key": "load.gpuStrictVramCap", "value": False},
]
Expand Down