diff --git a/sdk-schema/lms-with-inferred-unions.json b/sdk-schema/lms-with-inferred-unions.json
index 5497948..5d774b6 100644
--- a/sdk-schema/lms-with-inferred-unions.json
+++ b/sdk-schema/lms-with-inferred-unions.json
@@ -67,12 +67,16 @@
         },
         {
           "$ref": "#/definitions/presetManifest"
+        },
+        {
+          "$ref": "#/definitions/modelManifest"
         }
       ],
       "discriminator": {
         "mapping": {
           "plugin": "#/definitions/pluginManifest",
-          "preset": "#/definitions/presetManifest"
+          "preset": "#/definitions/presetManifest",
+          "model": "#/definitions/modelManifest"
         },
         "propertyName": "type"
       }
@@ -84,10 +88,14 @@
           "$ref": "#/definitions/kebabCase"
         },
         "name": {
-          "$ref": "#/definitions/kebabCase"
+          "type": "string",
+          "pattern": "^[a-z0-9]+(?:-[a-z0-9]+)*$",
+          "minLength": 1,
+          "maxLength": 100
         },
         "description": {
-          "type": "string"
+          "type": "string",
+          "maxLength": 1000
         },
         "revision": {
           "type": "integer"
@@ -425,7 +433,7 @@
     "embeddingLoadModelConfig": {
       "type": "object",
       "properties": {
-        "gpuOffload": {
+        "gpu": {
           "$ref": "#/definitions/gpuSetting"
         },
         "contextLength": {
@@ -592,6 +600,9 @@
         },
         {
           "$ref": "#/definitions/errorDisplayDataGenericEngineDoesNotSupportFeature"
+        },
+        {
+          "$ref": "#/definitions/errorDisplayDataGenericPresetNotFound"
         }
       ],
       "discriminator": {
@@ -601,7 +612,8 @@
           "generic.pathNotFound": "#/definitions/errorDisplayDataGenericPathNotFound",
           "generic.identifierNotFound": "#/definitions/errorDisplayDataGenericIdentifierNotFound",
           "generic.domainMismatch": "#/definitions/errorDisplayDataGenericDomainMismatch",
-          "generic.engineDoesNotSupportFeature": "#/definitions/errorDisplayDataGenericEngineDoesNotSupportFeature"
+          "generic.engineDoesNotSupportFeature": "#/definitions/errorDisplayDataGenericEngineDoesNotSupportFeature",
+          "generic.presetNotFound": "#/definitions/errorDisplayDataGenericPresetNotFound"
         },
         "propertyName": "code"
       }
@@ -656,6 +668,50 @@
         "unknown"
       ]
     },
+    "gpuSplitConfig": {
+      "type": "object",
+      "properties": {
+        "strategy": {
+          "$ref": "#/definitions/gpuSplitStrategy"
+        },
+        "disabledGpus": {
+          "type": "array",
+          "items": {
+            "type": "integer",
+            "minimum": 0
+          }
+        },
+        "priority": {
+          "type": "array",
+          "items": {
+            "type": "integer",
+            "minimum": 0
+          }
+        },
+        "customRatio": {
+          "type": "array",
+          "items": {
+            "type": "number",
+            "minimum": 0
+          }
+        }
+      },
+      "required": [
+        "strategy",
+        "disabledGpus",
+        "priority",
+        "customRatio"
+      ],
+      "additionalProperties": false
+    },
+    "gpuSplitStrategy": {
+      "type": "string",
+      "enum": [
+        "evenly",
+        "priorityOrder",
+        "custom"
+      ]
+    },
     "kvConfigFieldDependency": {
       "type": "object",
       "properties": {
@@ -688,10 +744,12 @@
     "kvConfigLayerName": {
       "type": "string",
       "enum": [
+        "currentlyEditing",
         "currentlyLoaded",
         "apiOverride",
         "conversationSpecific",
         "conversationGlobal",
+        "preset",
         "serverSession",
         "httpServerRequestOverride",
         "completeModeFormatting",
@@ -852,9 +910,12 @@
     "llmLoadModelConfig": {
       "type": "object",
       "properties": {
-        "gpuOffload": {
+        "gpu": {
           "$ref": "#/definitions/gpuSetting"
         },
+        "gpuStrictVramCap": {
+          "type": "boolean"
+        },
         "contextLength": {
           "type": "integer",
           "minimum": 1
@@ -1697,6 +1758,9 @@
         },
         "jsonSchema": {
           "$ref": "#/definitions/functionToolCallRequest/properties/arguments/additionalProperties"
+        },
+        "gbnfGrammar": {
+          "type": "string"
         }
       },
       "required": [
@@ -1708,7 +1772,8 @@
       "type": "string",
       "enum": [
         "none",
-        "json"
+        "json",
+        "gbnf"
       ]
     },
     "llmToolArray": {
@@ -2449,6 +2514,41 @@
       ],
       "additionalProperties": false
     },
+    "modelManifest": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "const": "model",
+          "default": "model",
+          "title": "Type"
+        },
+        "virtual": {
+          "type": "boolean",
+          "const": true
+        },
+        "owner": {
+          "$ref": "#/definitions/kebabCase"
+        },
+        "name": {
+          "$ref": "#/definitions/artifactManifestBase/properties/name"
+        },
+        "description": {
+          "$ref": "#/definitions/artifactManifestBase/properties/description"
+        },
+        "revision": {
+          "$ref": "#/definitions/artifactManifestBase/properties/revision"
+        }
+      },
+      "required": [
+        "type",
+        "virtual",
+        "owner",
+        "name",
+        "description"
+      ],
+      "additionalProperties": false
+    },
     "modelQuery": {
       "type": "object",
       "properties": {
@@ -2500,7 +2600,7 @@
           "$ref": "#/definitions/kebabCase"
         },
         "name": {
-          "$ref": "#/definitions/kebabCase"
+          "$ref": "#/definitions/artifactManifestBase/properties/name"
         },
         "description": {
           "$ref": "#/definitions/artifactManifestBase/properties/description"
@@ -2537,7 +2637,7 @@
           "$ref": "#/definitions/kebabCase"
         },
         "name": {
-          "$ref": "#/definitions/kebabCase"
+          "$ref": "#/definitions/artifactManifestBase/properties/name"
         },
         "description": {
           "$ref": "#/definitions/artifactManifestBase/properties/description"
@@ -4149,6 +4249,9 @@
         "predictionConfigStack": {
           "$ref": "#/definitions/kvConfigStack"
         },
+        "fuzzyPresetIdentifier": {
+          "type": "string"
+        },
         "ignoreServerSessionConfig": {
           "type": "boolean"
         }
@@ -5585,6 +5688,49 @@
       ],
       "additionalProperties": false
     },
+    "errorDisplayDataGenericPresetNotFound": {
+      "type": "object",
+      "properties": {
+        "code": {
+          "type": "string",
+          "const": "generic.presetNotFound",
+          "default": "generic.presetNotFound",
+          "title": "Code"
+        },
+        "specifiedFuzzyPresetIdentifier": {
+          "type": "string"
+        },
+        "availablePresetsSample": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "identifier": {
+                "type": "string"
+              },
+              "name": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "identifier",
+              "name"
+            ],
+            "additionalProperties": false
+          }
+        },
+        "totalAvailablePresets": {
+          "type": "integer"
+        }
+      },
+      "required": [
+        "code",
+        "specifiedFuzzyPresetIdentifier",
+        "availablePresetsSample",
+        "totalAvailablePresets"
+      ],
+      "additionalProperties": false
+    },
     "parsedFileIdentifierLocal": {
       "type": "object",
       "properties": {
diff --git a/sdk-schema/lms.json b/sdk-schema/lms.json
index 04a249e..936f506 100644
--- a/sdk-schema/lms.json
+++ b/sdk-schema/lms.json
@@ -67,6 +67,9 @@
         },
         {
           "$ref": "#/definitions/presetManifest"
+        },
+        {
+          "$ref": "#/definitions/modelManifest"
         }
       ]
     },
@@ -77,10 +80,14 @@
           "$ref": "#/definitions/kebabCase"
         },
         "name": {
-          "$ref": "#/definitions/kebabCase"
+          "type": "string",
+          "pattern": "^[a-z0-9]+(?:-[a-z0-9]+)*$",
+          "minLength": 1,
+          "maxLength": 100
         },
         "description": {
-          "type": "string"
+          "type": "string",
+          "maxLength": 1000
         },
         "revision": {
           "type": "integer"
@@ -492,7 +499,7 @@
     "embeddingLoadModelConfig": {
       "type": "object",
       "properties": {
-        "gpuOffload": {
+        "gpu": {
           "$ref": "#/definitions/gpuSetting"
         },
         "contextLength": {
@@ -795,6 +802,47 @@
             "supportedVersion"
           ],
           "additionalProperties": false
+        },
+        {
+          "type": "object",
+          "properties": {
+            "code": {
+              "type": "string",
+              "const": "generic.presetNotFound"
+            },
+            "specifiedFuzzyPresetIdentifier": {
+              "type": "string"
+            },
+            "availablePresetsSample": {
+              "type": "array",
+              "items": {
+                "type": "object",
+                "properties": {
+                  "identifier": {
+                    "type": "string"
+                  },
+                  "name": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "identifier",
+                  "name"
+                ],
+                "additionalProperties": false
+              }
+            },
+            "totalAvailablePresets": {
+              "type": "integer"
+            }
+          },
+          "required": [
+            "code",
+            "specifiedFuzzyPresetIdentifier",
+            "availablePresetsSample",
+            "totalAvailablePresets"
+          ],
+          "additionalProperties": false
         }
       ]
     },
@@ -869,6 +917,50 @@
         "unknown"
       ]
     },
+    "gpuSplitConfig": {
+      "type": "object",
+      "properties": {
+        "strategy": {
+          "$ref": "#/definitions/gpuSplitStrategy"
+        },
+        "disabledGpus": {
+          "type": "array",
+          "items": {
+            "type": "integer",
+            "minimum": 0
+          }
+        },
+        "priority": {
+          "type": "array",
+          "items": {
+            "type": "integer",
+            "minimum": 0
+          }
+        },
+        "customRatio": {
+          "type": "array",
+          "items": {
+            "type": "number",
+            "minimum": 0
+          }
+        }
+      },
+      "required": [
+        "strategy",
+        "disabledGpus",
+        "priority",
+        "customRatio"
+      ],
+      "additionalProperties": false
+    },
+    "gpuSplitStrategy": {
+      "type": "string",
+      "enum": [
+        "evenly",
+        "priorityOrder",
+        "custom"
+      ]
+    },
     "kvConfigFieldDependency": {
       "type": "object",
       "properties": {
@@ -930,10 +1022,12 @@
     "kvConfigLayerName": {
       "type": "string",
       "enum": [
+        "currentlyEditing",
         "currentlyLoaded",
         "apiOverride",
         "conversationSpecific",
         "conversationGlobal",
+        "preset",
         "serverSession",
         "httpServerRequestOverride",
         "completeModeFormatting",
@@ -1217,9 +1311,12 @@
     "llmLoadModelConfig": {
       "type": "object",
       "properties": {
-        "gpuOffload": {
+        "gpu": {
           "$ref": "#/definitions/gpuSetting"
         },
+        "gpuStrictVramCap": {
+          "type": "boolean"
+        },
         "contextLength": {
           "type": "integer",
           "minimum": 1
@@ -2115,6 +2212,9 @@
         },
         "jsonSchema": {
           "$ref": "#/definitions/functionToolCallRequest/properties/arguments/additionalProperties"
+        },
+        "gbnfGrammar": {
+          "type": "string"
         }
       },
       "required": [
@@ -2126,7 +2226,8 @@
       "type": "string",
       "enum": [
         "none",
-        "json"
+        "json",
+        "gbnf"
       ]
     },
     "llmToolArray": {
@@ -2866,6 +2967,39 @@
       ],
       "additionalProperties": false
     },
+    "modelManifest": {
+      "type": "object",
+      "properties": {
+        "type": {
+          "type": "string",
+          "const": "model"
+        },
+        "virtual": {
+          "type": "boolean",
+          "const": true
+        },
+        "owner": {
+          "$ref": "#/definitions/kebabCase"
+        },
+        "name": {
+          "$ref": "#/definitions/artifactManifestBase/properties/name"
+        },
+        "description": {
+          "$ref": "#/definitions/artifactManifestBase/properties/description"
+        },
+        "revision": {
+          "$ref": "#/definitions/artifactManifestBase/properties/revision"
+        }
+      },
+      "required": [
+        "type",
+        "virtual",
+        "owner",
+        "name",
+        "description"
+      ],
+      "additionalProperties": false
+    },
     "modelQuery": {
       "type": "object",
       "properties": {
@@ -2936,7 +3070,7 @@
           "$ref": "#/definitions/kebabCase"
         },
         "name": {
-          "$ref": "#/definitions/kebabCase"
+          "$ref": "#/definitions/artifactManifestBase/properties/name"
         },
         "description": {
           "$ref": "#/definitions/artifactManifestBase/properties/description"
@@ -2971,7 +3105,7 @@
           "$ref": "#/definitions/kebabCase"
         },
         "name": {
-          "$ref": "#/definitions/kebabCase"
+          "$ref": "#/definitions/artifactManifestBase/properties/name"
         },
         "description": {
           "$ref": "#/definitions/artifactManifestBase/properties/description"
@@ -4979,6 +5113,9 @@
         "predictionConfigStack": {
           "$ref": "#/definitions/kvConfigStack"
         },
+        "fuzzyPresetIdentifier": {
+          "type": "string"
+        },
         "ignoreServerSessionConfig": {
           "type": "boolean"
         }
diff --git a/sdk-schema/lmstudio-js b/sdk-schema/lmstudio-js
index 7155368..2ae5ac6 160000
--- a/sdk-schema/lmstudio-js
+++ b/sdk-schema/lmstudio-js
@@ -1 +1 @@
-Subproject commit 71553680bd102eaeb86c0cbc1a42eb1908b6ddda
+Subproject commit 2ae5ac65245f5b2a11c7b4a3fdb8065f19cc1d33
diff --git a/src/lmstudio/_kv_config.py b/src/lmstudio/_kv_config.py
index 3c3ec8a..e1126f5 100644
--- a/src/lmstudio/_kv_config.py
+++ b/src/lmstudio/_kv_config.py
@@ -3,7 +3,7 @@
 # Known KV config settings are defined in
 # https://github.com/lmstudio-ai/lmstudio-js/blob/main/packages/lms-kv-config/src/schema.ts
 from dataclasses import dataclass
-from typing import Any, Iterable, Sequence, Type, TypeVar
+from typing import Any, Container, Iterable, Sequence, Type, TypeVar
 
 from .sdk_api import LMStudioValueError
 from .schemas import DictSchema, DictObject, ModelSchema, MutableDictObject
@@ -115,7 +115,7 @@ def update_client_config(
     "ropeFrequencyScale": CheckboxField("ropeFrequencyScale"),
     "tryMmap": ConfigField("tryMmap"),
     "acceleration": {
-        "offloadRatio": NestedKeyField("gpuOffload", "ratio"),
+        "offloadRatio": NestedKeyField("gpu", "ratio"),
     },
 }
 
@@ -126,8 +126,9 @@ def update_client_config(
 SUPPORTED_SERVER_KEYS: dict[str, DictObject] = {
     "load": {
         "gpuSplitConfig": MultiPartField(
-            "gpuOffload", ("mainGpu", "splitStrategy", "disabledGpus")
+            "gpu", ("mainGpu", "splitStrategy", "disabledGpus")
         ),
+        "gpuStrictVramCap": ConfigField("gpuStrictVramCap"),
     },
     "embedding.load": {
         **_COMMON_MODEL_LOAD_KEYS,
@@ -185,7 +186,9 @@ def update_client_config(
 
 
 # Define mappings to translate server KV configs to client config instances
-def _iter_server_keys(*namespaces: str) -> Iterable[tuple[str, ConfigField]]:
+def _iter_server_keys(
+    *namespaces: str, excluded: Container[str] = ()
+) -> Iterable[tuple[str, ConfigField]]:
     # Map dotted config field names to their client config field counterparts
     for namespace in namespaces:
         scopes: list[tuple[str, DictObject]] = [
@@ -193,6 +196,9 @@ def _iter_server_keys(*namespaces: str) -> Iterable[tuple[str, ConfigField]]:
         ]
         for prefix, scope in scopes:
             for k, v in scope.items():
+                if k in excluded:
+                    # 'load' config namespace currently includes some LLM-only config keys
+                    continue
                 prefixed_key = f"{prefix}.{k}" if prefix else k
                 if isinstance(v, ConfigField):
                     yield prefixed_key, v
@@ -202,7 +208,9 @@ def _iter_server_keys(*namespaces: str) -> Iterable[tuple[str, ConfigField]]:
 
 
 FROM_SERVER_LOAD_LLM = dict(_iter_server_keys("load", "llm.load"))
-FROM_SERVER_LOAD_EMBEDDING = dict(_iter_server_keys("load", "embedding.load"))
+FROM_SERVER_LOAD_EMBEDDING = dict(
+    _iter_server_keys("load", "embedding.load", excluded="gpuStrictVramCap")
+)
 FROM_SERVER_PREDICTION = dict(_iter_server_keys("llm.prediction"))
 FROM_SERVER_CONFIG = dict(_iter_server_keys(*SUPPORTED_SERVER_KEYS))
 
@@ -216,7 +224,7 @@ def _invert_config_keymap(from_server: FromServerKeymap) -> ToServerKeymap:
     to_server: ToServerKeymap = {}
     for server_key, config_field in sorted(from_server.items()):
         client_key = config_field.client_key
-        # There's at least one client field (gpuOffload) which maps to
+        # There's at least one client field (gpu) which maps to
         # multiple KV config fields, so don't expect a 1:1 mapping
         config_fields = to_server.setdefault(client_key, [])
         config_fields.append((server_key, config_field))
diff --git a/src/lmstudio/_sdk_models/__init__.py b/src/lmstudio/_sdk_models/__init__.py
index 465c7af..f3d4a6e 100644
--- a/src/lmstudio/_sdk_models/__init__.py
+++ b/src/lmstudio/_sdk_models/__init__.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  lms-with-inferred-unions.json
-#   timestamp: 2025-03-13T13:10:49+00:00
+#   timestamp: 2025-03-20T15:15:46+00:00
 
 from __future__ import annotations
 
@@ -33,6 +33,8 @@
     "AcceleratorDict",
     "ArtifactManifestBase",
     "ArtifactManifestBaseDict",
+    "AvailablePresetsSampleItem",
+    "AvailablePresetsSampleItemDict",
     "BackendNotification",
     "BackendNotificationDict",
     "BlockLocationAfterId",
@@ -141,6 +143,8 @@
     "ErrorDisplayDataGenericNoModelMatchingQueryDict",
     "ErrorDisplayDataGenericPathNotFound",
     "ErrorDisplayDataGenericPathNotFoundDict",
+    "ErrorDisplayDataGenericPresetNotFound",
+    "ErrorDisplayDataGenericPresetNotFoundDict",
     "ErrorDisplayDataGenericSpecificModelUnloaded",
     "ErrorDisplayDataGenericSpecificModelUnloadedDict",
     "FilesChannelRetrieveCreationParameter",
@@ -181,6 +185,8 @@
     "GetModelOptsDict",
     "GpuSetting",
     "GpuSettingDict",
+    "GpuSplitConfig",
+    "GpuSplitConfigDict",
     "InternalRetrievalResult",
     "InternalRetrievalResultDict",
     "InternalRetrievalResultEntry",
@@ -333,6 +339,8 @@
     "ModelInfoBaseDict",
     "ModelInstanceInfoBase",
     "ModelInstanceInfoBaseDict",
+    "ModelManifest",
+    "ModelManifestDict",
     "ModelQuery",
     "ModelQueryDict",
     "ModelSearchOpts",
@@ -826,6 +834,18 @@ class SerializedLMSExtendedErrorDict(TypedDict):
 ]
 
 
+DisabledGpu = Annotated[int, Meta(ge=0)]
+
+
+PriorityItem = Annotated[int, Meta(ge=0)]
+
+
+CustomRatioItem = Annotated[float, Meta(ge=0.0)]
+
+
+GpuSplitStrategy = Literal["evenly", "priorityOrder", "custom"]
+
+
 ###############################################################################
 # KvConfigField
 ###############################################################################
@@ -848,10 +868,12 @@ class KvConfigFieldDict(TypedDict):
 
 
 KvConfigLayerName = Literal[
+    "currentlyEditing",
     "currentlyLoaded",
     "apiOverride",
     "conversationSpecific",
     "conversationGlobal",
+    "preset",
     "serverSession",
     "httpServerRequestOverride",
     "completeModeFormatting",
@@ -1172,7 +1194,7 @@ class LlmManualPromptTemplateDict(TypedDict):
 LlmPromptTemplateType = Literal["manual", "jinja"]
 
 
-LlmStructuredPredictionType = Literal["none", "json"]
+LlmStructuredPredictionType = Literal["none", "json", "gbnf"]
 
 
 ###############################################################################
@@ -2868,6 +2890,65 @@ class ErrorDisplayDataGenericEngineDoesNotSupportFeatureDict(TypedDict):
     supportedVersion: NotRequired[str | None]
 
 
+###############################################################################
+# AvailablePresetsSampleItem
+###############################################################################
+
+
+class AvailablePresetsSampleItem(
+    LMStudioStruct["AvailablePresetsSampleItemDict"], kw_only=True
+):
+    identifier: str
+    name: str
+
+
+class AvailablePresetsSampleItemDict(TypedDict):
+    """Corresponding typed dictionary definition for AvailablePresetsSampleItem.
+
+    NOTE: Multi-word keys are defined using their camelCase form,
+    as that is what `to_dict()` emits, and what `_from_api_dict()` accepts.
+    """
+
+    identifier: str
+    name: str
+
+
+###############################################################################
+# ErrorDisplayDataGenericPresetNotFound
+###############################################################################
+
+
+class ErrorDisplayDataGenericPresetNotFound(
+    LMStudioStruct["ErrorDisplayDataGenericPresetNotFoundDict"],
+    kw_only=True,
+    tag_field="code",
+    tag="generic.presetNotFound",
+):
+    code: ClassVar[Annotated[Literal["generic.presetNotFound"], Meta(title="Code")]] = (
+        "generic.presetNotFound"
+    )
+    specified_fuzzy_preset_identifier: str = field(
+        name="specifiedFuzzyPresetIdentifier"
+    )
+    available_presets_sample: Sequence[AvailablePresetsSampleItem] = field(
+        name="availablePresetsSample"
+    )
+    total_available_presets: int = field(name="totalAvailablePresets")
+
+
+class ErrorDisplayDataGenericPresetNotFoundDict(TypedDict):
+    """Corresponding typed dictionary definition for ErrorDisplayDataGenericPresetNotFound.
+
+    NOTE: Multi-word keys are defined using their camelCase form,
+    as that is what `to_dict()` emits, and what `_from_api_dict()` accepts.
+    """
+
+    code: Literal["generic.presetNotFound"]
+    specifiedFuzzyPresetIdentifier: str
+    availablePresetsSample: Sequence[AvailablePresetsSampleItemDict]
+    totalAvailablePresets: int
+
+
 ###############################################################################
 # ParsedFileIdentifierLocal
 ###############################################################################
@@ -4557,12 +4638,20 @@ class RepositoryChannelEnsureAuthenticatedToClientPacketAuthenticatedDict(TypedD
     type: Literal["authenticated"]
 
 
-Description = str
+Description = Annotated[str, Meta(max_length=1000)]
+
+
+Name = Annotated[
+    str, Meta(max_length=100, min_length=1, pattern="^[a-z0-9]+(?:-[a-z0-9]+)*$")
+]
 
 
 Revision = int
 
 
+DescriptionModel = str
+
+
 NoAutoDismiss = bool
 
 
@@ -4663,8 +4752,10 @@ class RepositoryChannelEnsureAuthenticatedToClientPacketAuthenticatedDict(TypedD
 
 class ArtifactManifestBase(LMStudioStruct["ArtifactManifestBaseDict"], kw_only=True):
     owner: KebabCase
-    name: KebabCase
-    description: str
+    name: Annotated[
+        str, Meta(max_length=100, min_length=1, pattern="^[a-z0-9]+(?:-[a-z0-9]+)*$")
+    ]
+    description: Annotated[str, Meta(max_length=1000)]
     revision: int | None = None
 
 
@@ -4676,8 +4767,10 @@ class ArtifactManifestBaseDict(TypedDict):
     """
 
     owner: str
-    name: str
-    description: str
+    name: Annotated[
+        str, Meta(max_length=100, min_length=1, pattern="^[a-z0-9]+(?:-[a-z0-9]+)*$")
+    ]
+    description: Annotated[str, Meta(max_length=1000)]
     revision: NotRequired[int | None]
 
 
@@ -4854,6 +4947,31 @@ class EmbeddingModelInstanceInfoDict(TypedDict):
 ParsedFileIdentifier = ParsedFileIdentifierLocal | ParsedFileIdentifierBase64
 
 
+###############################################################################
+# GpuSplitConfig
+###############################################################################
+
+
+class GpuSplitConfig(LMStudioStruct["GpuSplitConfigDict"], kw_only=True):
+    strategy: GpuSplitStrategy
+    disabled_gpus: Sequence[DisabledGpu] = field(name="disabledGpus")
+    priority: Sequence[PriorityItem]
+    custom_ratio: Sequence[CustomRatioItem] = field(name="customRatio")
+
+
+class GpuSplitConfigDict(TypedDict):
+    """Corresponding typed dictionary definition for GpuSplitConfig.
+
+    NOTE: Multi-word keys are defined using their camelCase form,
+    as that is what `to_dict()` emits, and what `_from_api_dict()` accepts.
+    """
+
+    strategy: GpuSplitStrategy
+    disabledGpus: Sequence[int]
+    priority: Sequence[int]
+    customRatio: Sequence[float]
+
+
 ContentBlockStyle = (
     ContentBlockStyleDefault | ContentBlockStyleCustomLabel | ContentBlockStyleThinking
 )
@@ -4893,7 +5011,8 @@ class GpuSettingDict(TypedDict):
 
 
 class LlmLoadModelConfig(LMStudioStruct["LlmLoadModelConfigDict"], kw_only=True):
-    gpu_offload: GpuSetting | None = field(name="gpuOffload", default=None)
+    gpu: GpuSetting | None = None
+    gpu_strict_vram_cap: bool | None = field(name="gpuStrictVramCap", default=None)
     context_length: Annotated[int, Meta(ge=1)] | None = field(
         name="contextLength", default=None
     )
@@ -4927,7 +5046,8 @@ class LlmLoadModelConfigDict(TypedDict):
     as that is what `to_dict()` emits, and what `_from_api_dict()` accepts.
     """
 
-    gpuOffload: NotRequired[GpuSettingDict | None]
+    gpu: NotRequired[GpuSettingDict | None]
+    gpuStrictVramCap: NotRequired[bool | None]
     contextLength: NotRequired[Annotated[int, Meta(ge=1)] | None]
     ropeFrequencyBase: NotRequired[float | None]
     ropeFrequencyScale: NotRequired[float | None]
@@ -5112,6 +5232,7 @@ class LlmStructuredPredictionSetting(
 ):
     type: LlmStructuredPredictionType
     json_schema: AdditionalProperties | None = field(name="jsonSchema", default=None)
+    gbnf_grammar: str | None = field(name="gbnfGrammar", default=None)
 
 
 class LlmStructuredPredictionSettingDict(TypedDict):
@@ -5123,6 +5244,7 @@ class LlmStructuredPredictionSettingDict(TypedDict):
 
     type: LlmStructuredPredictionType
     jsonSchema: NotRequired[AdditionalProperties | None]
+    gbnfGrammar: NotRequired[str | None]
 
 
 BlockLocation = BlockLocationBeforeId | BlockLocationAfterId
@@ -5287,6 +5409,37 @@ class ModelInstanceInfoBaseDict(TypedDict):
     architecture: NotRequired[str | None]
 
 
+###############################################################################
+# ModelManifest
+###############################################################################
+
+
+class ModelManifest(
+    LMStudioStruct["ModelManifestDict"], kw_only=True, tag_field="type", tag="model"
+):
+    type: ClassVar[Annotated[Literal["model"], Meta(title="Type")]] = "model"
+    virtual: bool
+    owner: KebabCase
+    name: Name
+    description: Description
+    revision: Revision | None = None
+
+
+class ModelManifestDict(TypedDict):
+    """Corresponding typed dictionary definition for ModelManifest.
+
+    NOTE: Multi-word keys are defined using their camelCase form,
+    as that is what `to_dict()` emits, and what `_from_api_dict()` accepts.
+    """
+
+    type: Literal["model"]
+    virtual: bool
+    owner: str
+    name: str
+    description: str
+    revision: NotRequired[int | None]
+
+
 ###############################################################################
 # ModelQuery
 ###############################################################################
@@ -5323,7 +5476,7 @@ class PluginManifest(
     type: ClassVar[Annotated[Literal["plugin"], Meta(title="Type")]] = "plugin"
     runner: PluginRunnerType
     owner: KebabCase
-    name: KebabCase
+    name: Name
     description: Description
     revision: Revision | None = None
 
@@ -5353,7 +5506,7 @@ class PresetManifest(
 ):
     type: ClassVar[Annotated[Literal["preset"], Meta(title="Type")]] = "preset"
     owner: KebabCase
-    name: KebabCase
+    name: Name
     description: Description
     revision: Revision | None = None
 
@@ -6048,7 +6201,7 @@ class SystemRpcNotifyParameter(
     LMStudioStruct["SystemRpcNotifyParameterDict"], kw_only=True
 ):
     title: Title
-    description: Description | None = None
+    description: DescriptionModel | None = None
     no_auto_dismiss: NoAutoDismiss | None = field(name="noAutoDismiss", default=None)
 
 
@@ -6779,7 +6932,7 @@ class PluginsChannelSetGeneratorToClientPacketGenerateDict(TypedDict):
     token: str
 
 
-ArtifactManifest = PluginManifest | PresetManifest
+ArtifactManifest = PluginManifest | PresetManifest | ModelManifest
 
 
 ChatMessageData = (
@@ -6812,7 +6965,7 @@ class PluginsChannelSetGeneratorToClientPacketGenerateDict(TypedDict):
 class EmbeddingLoadModelConfig(
     LMStudioStruct["EmbeddingLoadModelConfigDict"], kw_only=True
 ):
-    gpu_offload: GpuSetting | None = field(name="gpuOffload", default=None)
+    gpu: GpuSetting | None = None
     context_length: Annotated[int, Meta(ge=1)] | None = field(
         name="contextLength", default=None
     )
@@ -6829,7 +6982,7 @@ class EmbeddingLoadModelConfigDict(TypedDict):
     as that is what `to_dict()` emits, and what `_from_api_dict()` accepts.
     """
 
-    gpuOffload: NotRequired[GpuSettingDict | None]
+    gpu: NotRequired[GpuSettingDict | None]
     contextLength: NotRequired[Annotated[int, Meta(ge=1)] | None]
     ropeFrequencyBase: NotRequired[float | None]
     ropeFrequencyScale: NotRequired[float | None]
@@ -6844,6 +6997,7 @@ class EmbeddingLoadModelConfigDict(TypedDict):
     | ErrorDisplayDataGenericIdentifierNotFound
     | ErrorDisplayDataGenericDomainMismatch
     | ErrorDisplayDataGenericEngineDoesNotSupportFeature
+    | ErrorDisplayDataGenericPresetNotFound
 )
 
 
@@ -8114,6 +8268,9 @@ class LlmChannelPredictCreationParameter(
     model_specifier: ModelSpecifier = field(name="modelSpecifier")
     history: ChatHistoryData
     prediction_config_stack: KvConfigStack = field(name="predictionConfigStack")
+    fuzzy_preset_identifier: str | None = field(
+        name="fuzzyPresetIdentifier", default=None
+    )
     ignore_server_session_config: bool | None = field(
         name="ignoreServerSessionConfig", default=None
     )
@@ -8129,6 +8286,7 @@ class LlmChannelPredictCreationParameterDict(TypedDict):
     modelSpecifier: ModelSpecifierDict
     history: ChatHistoryDataDict
     predictionConfigStack: KvConfigStackDict
+    fuzzyPresetIdentifier: NotRequired[str | None]
     ignoreServerSessionConfig: NotRequired[bool | None]
 
 
diff --git a/src/lmstudio/async_api.py b/src/lmstudio/async_api.py
index 462e48c..906270b 100644
--- a/src/lmstudio/async_api.py
+++ b/src/lmstudio/async_api.py
@@ -502,7 +502,10 @@ async def load_new_instance(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TAsyncModelHandle:
-        """Load this model with the given identifier and configuration."""
+        """Load this model with the given identifier and configuration.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         handle: TAsyncModelHandle = await self._session._load_new_instance(
             self.model_key, instance_identifier, ttl, config, on_load_progress
         )
@@ -516,6 +519,11 @@ async def model(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TAsyncModelHandle:
+        """Retrieve model with given identifier, or load it with given configuration.
+
+        Note: configuration of retrieved model is NOT checked against the given config.
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         # Call _get_or_load directly, since we have a model identifier
         handle: TAsyncModelHandle = await self._session._get_or_load(
             self.model_key, ttl, config, on_load_progress
@@ -786,7 +794,11 @@ async def model(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TAsyncModelHandle:
-        """Get a handle to the specified model (loading it if necessary)."""
+        """Get a handle to the specified model (loading it if necessary).
+
+        Note: configuration of retrieved model is NOT checked against the given config.
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         if model_key is None:
             # Should this raise an error if a config is supplied?
             return await self._get_any()
@@ -816,7 +828,10 @@ async def load_new_instance(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TAsyncModelHandle:
-        """Load the specified model with the given identifier and configuration."""
+        """Load the specified model with the given identifier and configuration.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         return await self._load_new_instance(
             model_key, instance_identifier, ttl, config, on_load_progress
         )
@@ -1033,7 +1048,10 @@ async def _complete_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> AsyncPredictionStream[str] | AsyncPredictionStream[DictObject]:
-        """Request a one-off prediction without any context and stream the generated tokens."""
+        """Request a one-off prediction without any context and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         endpoint = CompletionEndpoint(
             model_specifier,
             prompt,
@@ -1086,7 +1104,10 @@ async def _respond_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> AsyncPredictionStream[str] | AsyncPredictionStream[DictObject]:
-        """Request a response in an ongoing assistant chat session and stream the generated tokens."""
+        """Request a response in an ongoing assistant chat session and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         if not isinstance(history, Chat):
             history = Chat.from_history(history)
         endpoint = ChatResponseEndpoint(
@@ -1256,7 +1277,10 @@ async def complete_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> AsyncPredictionStream[str] | AsyncPredictionStream[DictObject]:
-        """Request a one-off prediction without any context and stream the generated tokens."""
+        """Request a one-off prediction without any context and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         return await self._session._complete_stream(
             self.identifier,
             prompt,
@@ -1304,7 +1328,10 @@ async def complete(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionResult[str] | PredictionResult[DictObject]:
-        """Request a one-off prediction without any context."""
+        """Request a one-off prediction without any context.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         prediction_stream = await self._session._complete_stream(
             self.identifier,
             prompt,
@@ -1357,7 +1384,10 @@ async def respond_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> AsyncPredictionStream[str] | AsyncPredictionStream[DictObject]:
-        """Request a response in an ongoing assistant chat session and stream the generated tokens."""
+        """Request a response in an ongoing assistant chat session and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         return await self._session._respond_stream(
             self.identifier,
             history,
@@ -1405,7 +1435,10 @@ async def respond(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionResult[str] | PredictionResult[DictObject]:
-        """Request a response in an ongoing assistant chat session."""
+        """Request a response in an ongoing assistant chat session.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         prediction_stream = await self._session._respond_stream(
             self.identifier,
             history,
diff --git a/src/lmstudio/sync_api.py b/src/lmstudio/sync_api.py
index 5ce86a4..b7c9558 100644
--- a/src/lmstudio/sync_api.py
+++ b/src/lmstudio/sync_api.py
@@ -690,7 +690,10 @@ def load_new_instance(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TModelHandle:
-        """Load this model with the given identifier and configuration."""
+        """Load this model with the given identifier and configuration.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         handle: TModelHandle = self._session._load_new_instance(
             self.model_key, instance_identifier, ttl, config, on_load_progress
         )
@@ -704,6 +707,11 @@ def model(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TModelHandle:
+        """Retrieve model with default identifier, or load it with given configuration.
+
+        Note: configuration of retrieved model is NOT checked against the given config.
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         # Call _get_or_load directly, since we have a model identifier
         handle: TModelHandle = self._session._get_or_load(
             self.model_key, ttl, config, on_load_progress
@@ -951,7 +959,11 @@ def model(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TModelHandle:
-        """Get a handle to the specified model (loading it if necessary)."""
+        """Get a handle to the specified model (loading it if necessary).
+
+        Note: configuration of retrieved model is NOT checked against the given config.
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         if model_key is None:
             # Should this raise an error if a config is supplied?
             return self._get_any()
@@ -981,7 +993,10 @@ def load_new_instance(
         config: TLoadConfig | TLoadConfigDict | None = None,
         on_load_progress: ModelLoadingCallback | None = None,
     ) -> TModelHandle:
-        """Load the specified model with the given identifier and configuration."""
+        """Load the specified model with the given identifier and configuration.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         return self._load_new_instance(
             model_key, instance_identifier, ttl, config, on_load_progress
         )
@@ -1198,7 +1213,10 @@ def _complete_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionStream[str] | PredictionStream[DictObject]:
-        """Request a one-off prediction without any context and stream the generated tokens."""
+        """Request a one-off prediction without any context and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         endpoint = CompletionEndpoint(
             model_specifier,
             prompt,
@@ -1251,7 +1269,10 @@ def _respond_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionStream[str] | PredictionStream[DictObject]:
-        """Request a response in an ongoing assistant chat session and stream the generated tokens."""
+        """Request a response in an ongoing assistant chat session and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         if not isinstance(history, Chat):
             history = Chat.from_history(history)
         endpoint = ChatResponseEndpoint(
@@ -1417,7 +1438,10 @@ def complete_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionStream[str] | PredictionStream[DictObject]:
-        """Request a one-off prediction without any context and stream the generated tokens."""
+        """Request a one-off prediction without any context and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         return self._session._complete_stream(
             self.identifier,
             prompt,
@@ -1465,7 +1489,10 @@ def complete(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionResult[str] | PredictionResult[DictObject]:
-        """Request a one-off prediction without any context."""
+        """Request a one-off prediction without any context.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         prediction_stream = self._session._complete_stream(
             self.identifier,
             prompt,
@@ -1518,7 +1545,10 @@ def respond_stream(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionStream[str] | PredictionStream[DictObject]:
-        """Request a response in an ongoing assistant chat session and stream the generated tokens."""
+        """Request a response in an ongoing assistant chat session and stream the generated tokens.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         return self._session._respond_stream(
             self.identifier,
             history,
@@ -1566,7 +1596,10 @@ def respond(
         on_prediction_fragment: PredictionFragmentCallback | None = None,
         on_prompt_processing_progress: PromptProcessingCallback | None = None,
     ) -> PredictionResult[str] | PredictionResult[DictObject]:
-        """Request a response in an ongoing assistant chat session."""
+        """Request a response in an ongoing assistant chat session.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         prediction_stream = self._session._respond_stream(
             self.identifier,
             history,
@@ -1608,7 +1641,10 @@ def act(
         ]
         | None = None,
     ) -> ActResult:
-        """Request a response (with implicit tool use) in an ongoing agent chat session."""
+        """Request a response (with implicit tool use) in an ongoing agent chat session.
+
+        Note: details of configuration fields may change in SDK feature releases.
+        """
         start_time = time.perf_counter()
         # It is not yet possible to combine tool calling with requests for structured responses
         response_format = None
@@ -1920,7 +1956,11 @@ def llm(
     ttl: int | None = DEFAULT_TTL,
     config: LlmLoadModelConfig | LlmLoadModelConfigDict | None = None,
 ) -> LLM:
-    """Access an LLM using the default global client."""
+    """Access an LLM using the default global client.
+
+    Note: configuration of retrieved model is NOT checked against the given config.
+    Note: details of configuration fields may change in SDK feature releases.
+    """
     return get_default_client().llm.model(model_key, ttl=ttl, config=config)
 
 
@@ -1932,7 +1972,11 @@ def embedding_model(
     ttl: int | None = DEFAULT_TTL,
     config: EmbeddingLoadModelConfig | EmbeddingLoadModelConfigDict | None = None,
 ) -> EmbeddingModel:
-    """Access an embedding model using the default global client."""
+    """Access an embedding model using the default global client.
+
+    Note: configuration of retrieved model is NOT checked against the given config.
+    Note: details of configuration fields may change in SDK feature releases.
+    """
     return get_default_client().embedding.model(model_key, ttl=ttl, config=config)
 
 
diff --git a/tests/test_kv_config.py b/tests/test_kv_config.py
index 9da638d..24abace 100644
--- a/tests/test_kv_config.py
+++ b/tests/test_kv_config.py
@@ -45,7 +45,7 @@
 
 LOAD_CONFIG_EMBEDDING: EmbeddingLoadModelConfigDict = {
     "contextLength": 1978,
-    "gpuOffload": GPU_CONFIG,
+    "gpu": GPU_CONFIG,
     "keepModelInMemory": True,
     "ropeFrequencyBase": 10.0,
     "ropeFrequencyScale": 1.5,
@@ -54,7 +54,7 @@
 
 SC_LOAD_CONFIG_EMBEDDING = {
     "context_length": 1978,
-    "gpu_offload": SC_GPU_CONFIG,
+    "gpu": SC_GPU_CONFIG,
     "keep_model_in_memory": True,
     "rope_frequency_base": 10.0,
     "rope_frequency_scale": 1.5,
@@ -65,7 +65,8 @@
     "contextLength": 1978,
     "evalBatchSize": 42,
     "flashAttention": False,
-    "gpuOffload": GPU_CONFIG,
+    "gpu": GPU_CONFIG,
+    "gpuStrictVramCap": False,
     "keepModelInMemory": True,
     "llamaKCacheQuantizationType": "q8_0",
     "llamaVCacheQuantizationType": "f32",
@@ -81,7 +82,8 @@
     "context_length": 1978,
     "eval_batch_size": 42,
     "flash_attention": False,
-    "gpu_offload": SC_GPU_CONFIG,
+    "gpu": SC_GPU_CONFIG,
+    "gpu_strict_vram_cap": False,
     "keep_model_in_memory": True,
     "llama_k_cache_quantization_type": "q8_0",
     "llama_v_cache_quantization_type": "f32",
@@ -334,6 +336,7 @@ def test_kv_stack_field_coverage(
                     {"key": "llm.load.llama.useFp16ForKVCache", "value": True},
                     {"key": "llm.load.numExperts", "value": 0},
                     {"key": "llm.load.seed", "value": {"checked": True, "value": 313}},
+                    {"key": "load.gpuStrictVramCap", "value": False},
                 ]
             },
         }
@@ -455,6 +458,6 @@ def test_kv_stack_prediction_config_conflict() -> None:
 #       (this will most likely involve changing the data model code generation)
 # def test_nested_unknown_keys() -> None:
 #     config = LOAD_CONFIG_EMBEDDING.copy()
-#     LOAD_CONFIG_EMBEDDING["gpuOffload"] = SC_GPU_CONFIG
+#     LOAD_CONFIG_EMBEDDING["gpu"] = SC_GPU_CONFIG
 #     with pytest.raises(msgspec.ValidationError):
 #         EmbeddingLoadModelConfigStrict._from_api_dict(config)