vLLM: Move from guided_options_request to structured_outputs (#1805)

suryabdev · web-flow · commit eda5922d45ca · 2025-10-29T21:16:57.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -82,7 +82,7 @@ vision = [
   "selenium",
 ]
 vllm = [
-  "vllm",
+  "vllm>=0.10.2",
   "torch"
 ]
 all = [
diff --git a/src/smolagents/models.py b/src/smolagents/models.py
@@ -639,6 +639,7 @@ def generate(
         **kwargs,
     ) -> ChatMessage:
         from vllm import SamplingParams  # type: ignore
+        from vllm.sampling_params import StructuredOutputsParams  # type: ignore
 
         completion_kwargs = self._prepare_completion_kwargs(
             messages=messages,
@@ -648,7 +649,9 @@ def generate(
             **kwargs,
         )
         # Override the OpenAI schema for VLLM compatibility
-        guided_options_request = {"guided_json": response_format["json_schema"]["schema"]} if response_format else None
+        structured_outputs = (
+            StructuredOutputsParams(json=response_format["json_schema"]["schema"]) if response_format else None
+        )
 
         messages = completion_kwargs.pop("messages")
         prepared_stop_sequences = completion_kwargs.pop("stop", [])
@@ -667,12 +670,12 @@ def generate(
             temperature=kwargs.get("temperature", 0.0),
             max_tokens=kwargs.get("max_tokens", 2048),
             stop=prepared_stop_sequences,
+            structured_outputs=structured_outputs,
         )
 
         out = self.model.generate(
             prompt,
             sampling_params=sampling_params,
-            guided_options_request=guided_options_request,
             **completion_kwargs,
         )
 

Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ vision = [`
`82`	`82`	`"selenium",`
`83`	`83`	`]`
`84`	`84`	`vllm = [`
`85`		`- "vllm",`
	`85`	`+ "vllm>=0.10.2",`
`86`	`86`	`"torch"`
`87`	`87`	`]`
`88`	`88`	`all = [`