huggingface · suryabdev · Oct 10, 2025 · Oct 10, 2025 · Oct 11, 2025 · Oct 16, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -81,7 +81,7 @@ vision = [
   "selenium",
 ]
 vllm = [
-  "vllm",
+  "vllm>=0.10.2",
   "torch"
 ]
 all = [

diff --git a/src/smolagents/models.py b/src/smolagents/models.py
@@ -623,6 +623,7 @@ def generate(
         **kwargs,
     ) -> ChatMessage:
         from vllm import SamplingParams  # type: ignore
+        from vllm.sampling_params import StructuredOutputsParams # type: ignore
 
         completion_kwargs = self._prepare_completion_kwargs(
             messages=messages,
@@ -632,7 +633,7 @@ def generate(
             **kwargs,
         )
         # Override the OpenAI schema for VLLM compatibility
-        guided_options_request = {"guided_json": response_format["json_schema"]["schema"]} if response_format else None
+        structured_outputs = StructuredOutputsParams(json=response_format["json_schema"]["schema"]) if response_format else None
 additional_args["response_format"] = CODEAGENT_RESPONSE_FORMAT 
 "json_schema": { 
 additional_args["response_format"] = CODEAGENT_RESPONSE_FORMAT 
 "json_schema": { 
 
         messages = completion_kwargs.pop("messages")
         prepared_stop_sequences = completion_kwargs.pop("stop", [])
@@ -651,12 +652,12 @@ def generate(
             temperature=kwargs.get("temperature", 0.0),
             max_tokens=kwargs.get("max_tokens", 2048),
             stop=prepared_stop_sequences,
+            structured_outputs=structured_outputs,
         )
 
         out = self.model.generate(
             prompt,
             sampling_params=sampling_params,
-            guided_options_request=guided_options_request,
             **completion_kwargs,
         )
-Original file line number
+Diff line change
@@ Expand Up / @@ -81,7 +81,7 @@ vision = [ @@
       "selenium",
     ]
     vllm = [
-      "vllm",
+      "vllm>=0.10.2",
       "torch"
     ]
     all = [
@@ Expand Down @@