diff --git a/sdk-schema/lms-with-inferred-unions.json b/sdk-schema/lms-with-inferred-unions.json index 6a0c870..5497948 100644 --- a/sdk-schema/lms-with-inferred-unions.json +++ b/sdk-schema/lms-with-inferred-unions.json @@ -809,6 +809,12 @@ }, "splitStrategy": { "$ref": "#/definitions/llmSplitStrategy" + }, + "disabledGpus": { + "type": "array", + "items": { + "type": "integer" + } } }, "additionalProperties": false @@ -3210,6 +3216,50 @@ ], "additionalProperties": false }, + "embedding/rpc/countTokens/parameter": { + "type": "object", + "properties": { + "specifier": { + "$ref": "#/definitions/modelSpecifier" + }, + "inputString": { + "type": "string" + } + }, + "required": [ + "specifier", + "inputString" + ], + "additionalProperties": false + }, + "embedding/rpc/countTokens/returns": { + "type": "object", + "properties": { + "tokenCount": { + "type": "integer" + } + }, + "required": [ + "tokenCount" + ], + "additionalProperties": false + }, + "pseudo/embedding/rpc/countTokens": { + "type": "object", + "properties": { + "parameter": { + "$ref": "#/definitions/embedding/rpc/countTokens/parameter" + }, + "returns": { + "$ref": "#/definitions/embedding/rpc/countTokens/returns" + } + }, + "required": [ + "parameter", + "returns" + ], + "additionalProperties": false + }, "embedding/channel/loadModel/creationParameter": { "type": "object", "properties": { @@ -3390,6 +3440,9 @@ "rpcTokenize": { "$ref": "#/definitions/pseudo/embedding/rpc/tokenize" }, + "rpcCountTokens": { + "$ref": "#/definitions/pseudo/embedding/rpc/countTokens" + }, "channelLoadModel": { "$ref": "#/definitions/pseudo/embedding/channel/loadModel" }, @@ -3404,6 +3457,7 @@ "rpcGetLoadConfig", "rpcEmbedString", "rpcTokenize", + "rpcCountTokens", "channelLoadModel", "channelGetOrLoad" ], @@ -6216,7 +6270,7 @@ "title": "Type" }, "info": { - "$ref": "#/definitions/embeddingModelInstanceInfo" + "$ref": "#/definitions/modelInstanceInfo" } }, "required": [ @@ -6608,7 +6662,7 @@ "title": "Type" }, "info": { - "$ref": "#/definitions/llmInstanceInfo" + "$ref": "#/definitions/modelInstanceInfo" } }, "required": [ diff --git a/sdk-schema/lms.json b/sdk-schema/lms.json index de538a5..04a249e 100644 --- a/sdk-schema/lms.json +++ b/sdk-schema/lms.json @@ -1174,6 +1174,12 @@ }, "splitStrategy": { "$ref": "#/definitions/llmSplitStrategy" + }, + "disabledGpus": { + "type": "array", + "items": { + "type": "integer" + } } }, "additionalProperties": false @@ -3689,6 +3695,50 @@ ], "additionalProperties": false }, + "embedding/rpc/countTokens/parameter": { + "type": "object", + "properties": { + "specifier": { + "$ref": "#/definitions/modelSpecifier" + }, + "inputString": { + "type": "string" + } + }, + "required": [ + "specifier", + "inputString" + ], + "additionalProperties": false + }, + "embedding/rpc/countTokens/returns": { + "type": "object", + "properties": { + "tokenCount": { + "type": "integer" + } + }, + "required": [ + "tokenCount" + ], + "additionalProperties": false + }, + "pseudo/embedding/rpc/countTokens": { + "type": "object", + "properties": { + "parameter": { + "$ref": "#/definitions/embedding/rpc/countTokens/parameter" + }, + "returns": { + "$ref": "#/definitions/embedding/rpc/countTokens/returns" + } + }, + "required": [ + "parameter", + "returns" + ], + "additionalProperties": false + }, "embedding/channel/loadModel/creationParameter": { "type": "object", "properties": { @@ -3878,7 +3928,7 @@ "const": "unloadingOtherJITModel" }, "info": { - "$ref": "#/definitions/embeddingModelInstanceInfo" + "$ref": "#/definitions/modelInstanceInfo" } }, "required": [ @@ -3981,6 +4031,9 @@ "rpcTokenize": { "$ref": "#/definitions/pseudo/embedding/rpc/tokenize" }, + "rpcCountTokens": { + "$ref": "#/definitions/pseudo/embedding/rpc/countTokens" + }, "channelLoadModel": { "$ref": "#/definitions/pseudo/embedding/channel/loadModel" }, @@ -3995,6 +4048,7 @@ "rpcGetLoadConfig", "rpcEmbedString", "rpcTokenize", + "rpcCountTokens", "channelLoadModel", "channelGetOrLoad" ], @@ -4831,7 +4885,7 @@ "const": "unloadingOtherJITModel" }, "info": { - "$ref": "#/definitions/llmInstanceInfo" + "$ref": "#/definitions/modelInstanceInfo" } }, "required": [ diff --git a/sdk-schema/lmstudio-js b/sdk-schema/lmstudio-js index 12af436..7155368 160000 --- a/sdk-schema/lmstudio-js +++ b/sdk-schema/lmstudio-js @@ -1 +1 @@ -Subproject commit 12af436862cd393004c8f8668d29f3ed10a092c0 +Subproject commit 71553680bd102eaeb86c0cbc1a42eb1908b6ddda diff --git a/src/lmstudio/_sdk_models/__init__.py b/src/lmstudio/_sdk_models/__init__.py index b4b3ee6..aa235da 100644 --- a/src/lmstudio/_sdk_models/__init__.py +++ b/src/lmstudio/_sdk_models/__init__.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: lms-with-inferred-unions.json -# timestamp: 2025-02-27T03:57:00+00:00 +# timestamp: 2025-03-03T03:25:41+00:00 from __future__ import annotations @@ -119,6 +119,10 @@ "EmbeddingModelInstanceAdditionalInfoDict", "EmbeddingModelInstanceInfo", "EmbeddingModelInstanceInfoDict", + "EmbeddingRpcCountTokensParameter", + "EmbeddingRpcCountTokensParameterDict", + "EmbeddingRpcCountTokensReturns", + "EmbeddingRpcCountTokensReturnsDict", "EmbeddingRpcEmbedStringParameter", "EmbeddingRpcEmbedStringParameterDict", "EmbeddingRpcEmbedStringReturns", @@ -443,6 +447,8 @@ "PseudoEmbeddingChannelLoadModel", "PseudoEmbeddingChannelLoadModelDict", "PseudoEmbeddingDict", + "PseudoEmbeddingRpcCountTokens", + "PseudoEmbeddingRpcCountTokensDict", "PseudoEmbeddingRpcEmbedString", "PseudoEmbeddingRpcEmbedStringDict", "PseudoEmbeddingRpcGetLoadConfig", @@ -1766,6 +1772,27 @@ class EmbeddingRpcTokenizeReturnsDict(TypedDict): tokens: Sequence[float] +############################################################################### +# EmbeddingRpcCountTokensReturns +############################################################################### + + +class EmbeddingRpcCountTokensReturns( + LMStudioStruct["EmbeddingRpcCountTokensReturnsDict"], kw_only=True +): + token_count: int = field(name="tokenCount") + + +class EmbeddingRpcCountTokensReturnsDict(TypedDict): + """Corresponding typed dictionary definition for EmbeddingRpcCountTokensReturns. + + NOTE: Multi-word keys are defined using their camelCase form, + as that is what `to_dict()` emits, and what `_from_api_dict()` accepts. + """ + + tokenCount: int + + ############################################################################### # EmbeddingChannelLoadModelCreationParameter ############################################################################### @@ -4852,6 +4879,7 @@ class GpuSetting(LMStudioStruct["GpuSettingDict"], kw_only=True): ratio: LlmLlamaAccelerationOffloadRatio | None = None main_gpu: int | None = field(name="mainGpu", default=None) split_strategy: LlmSplitStrategy | None = field(name="splitStrategy", default=None) + disabled_gpus: Sequence[int] | None = field(name="disabledGpus", default=None) class GpuSettingDict(TypedDict): @@ -4864,6 +4892,7 @@ class GpuSettingDict(TypedDict): ratio: NotRequired[LlmLlamaAccelerationOffloadRatio | None] mainGpu: NotRequired[int | None] splitStrategy: NotRequired[LlmSplitStrategy | None] + disabledGpus: NotRequired[Sequence[int] | None] ############################################################################### @@ -6479,7 +6508,7 @@ class EmbeddingChannelGetOrLoadToClientPacketUnloadingOtherJITModel( type: ClassVar[Annotated[Literal["unloadingOtherJITModel"], Meta(title="Type")]] = ( "unloadingOtherJITModel" ) - info: EmbeddingModelInstanceInfo + info: ModelInstanceInfo class EmbeddingChannelGetOrLoadToClientPacketUnloadingOtherJITModelDict(TypedDict): @@ -6490,7 +6519,7 @@ class EmbeddingChannelGetOrLoadToClientPacketUnloadingOtherJITModelDict(TypedDic """ type: Literal["unloadingOtherJITModel"] - info: EmbeddingModelInstanceInfoDict + info: ModelInstanceInfo ############################################################################### @@ -6650,7 +6679,7 @@ class LlmChannelGetOrLoadToClientPacketUnloadingOtherJITModel( type: ClassVar[Annotated[Literal["unloadingOtherJITModel"], Meta(title="Type")]] = ( "unloadingOtherJITModel" ) - info: LlmInstanceInfo + info: ModelInstanceInfo class LlmChannelGetOrLoadToClientPacketUnloadingOtherJITModelDict(TypedDict): @@ -6661,7 +6690,7 @@ class LlmChannelGetOrLoadToClientPacketUnloadingOtherJITModelDict(TypedDict): """ type: Literal["unloadingOtherJITModel"] - info: LlmInstanceInfoDict + info: ModelInstanceInfo ############################################################################### @@ -7247,6 +7276,52 @@ class PseudoEmbeddingRpcTokenizeDict(TypedDict): returns: EmbeddingRpcTokenizeReturnsDict +############################################################################### +# EmbeddingRpcCountTokensParameter +############################################################################### + + +class EmbeddingRpcCountTokensParameter( + LMStudioStruct["EmbeddingRpcCountTokensParameterDict"], kw_only=True +): + specifier: ModelSpecifier + input_string: str = field(name="inputString") + + +class EmbeddingRpcCountTokensParameterDict(TypedDict): + """Corresponding typed dictionary definition for EmbeddingRpcCountTokensParameter. + + NOTE: Multi-word keys are defined using their camelCase form, + as that is what `to_dict()` emits, and what `_from_api_dict()` accepts. + """ + + specifier: ModelSpecifierDict + inputString: str + + +############################################################################### +# PseudoEmbeddingRpcCountTokens +############################################################################### + + +class PseudoEmbeddingRpcCountTokens( + LMStudioStruct["PseudoEmbeddingRpcCountTokensDict"], kw_only=True +): + parameter: EmbeddingRpcCountTokensParameter + returns: EmbeddingRpcCountTokensReturns + + +class PseudoEmbeddingRpcCountTokensDict(TypedDict): + """Corresponding typed dictionary definition for PseudoEmbeddingRpcCountTokens. + + NOTE: Multi-word keys are defined using their camelCase form, + as that is what `to_dict()` emits, and what `_from_api_dict()` accepts. + """ + + parameter: EmbeddingRpcCountTokensParameterDict + returns: EmbeddingRpcCountTokensReturnsDict + + EmbeddingChannelLoadModelToClientPacket = ( EmbeddingChannelLoadModelToClientPacketResolved | EmbeddingChannelLoadModelToClientPacketProgress @@ -7339,6 +7414,7 @@ class PseudoEmbedding(LMStudioStruct["PseudoEmbeddingDict"], kw_only=True): ) rpc_embed_string: PseudoEmbeddingRpcEmbedString = field(name="rpcEmbedString") rpc_tokenize: PseudoEmbeddingRpcTokenize = field(name="rpcTokenize") + rpc_count_tokens: PseudoEmbeddingRpcCountTokens = field(name="rpcCountTokens") channel_load_model: PseudoEmbeddingChannelLoadModel = field(name="channelLoadModel") channel_get_or_load: PseudoEmbeddingChannelGetOrLoad = field( name="channelGetOrLoad" @@ -7358,6 +7434,7 @@ class PseudoEmbeddingDict(TypedDict): rpcGetLoadConfig: PseudoEmbeddingRpcGetLoadConfigDict rpcEmbedString: PseudoEmbeddingRpcEmbedStringDict rpcTokenize: PseudoEmbeddingRpcTokenizeDict + rpcCountTokens: PseudoEmbeddingRpcCountTokensDict channelLoadModel: PseudoEmbeddingChannelLoadModelDict channelGetOrLoad: PseudoEmbeddingChannelGetOrLoadDict diff --git a/tests/test_kv_config.py b/tests/test_kv_config.py index 9d4f60b..e0535c0 100644 --- a/tests/test_kv_config.py +++ b/tests/test_kv_config.py @@ -35,8 +35,9 @@ "mainGpu": 0, "ratio": 0.5, "splitStrategy": "evenly", + "disabledGpus": [1, 2] } -SC_GPU_CONFIG = {"main_gpu": 0, "ratio": 0.5, "split_strategy": "evenly"} +SC_GPU_CONFIG = {"main_gpu": 0, "ratio": 0.5, "split_strategy": "evenly", "disabled_gpus": [1, 2]} LOAD_CONFIG_EMBEDDING: EmbeddingLoadModelConfigDict = { "contextLength": 1978, @@ -242,6 +243,7 @@ def test_snake_case_conversion( _NOT_YET_SUPPORTED_KEYS = { + "disabledGpus", "reasoningParsing", # "speculativeDecoding" scope "draftModel",