Skip to content

Commit eda5922

Browse files
authored
vLLM: Move from guided_options_request to structured_outputs (#1805)
1 parent 3a06a1c commit eda5922

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ vision = [
8282
"selenium",
8383
]
8484
vllm = [
85-
"vllm",
85+
"vllm>=0.10.2",
8686
"torch"
8787
]
8888
all = [

src/smolagents/models.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,7 @@ def generate(
639639
**kwargs,
640640
) -> ChatMessage:
641641
from vllm import SamplingParams # type: ignore
642+
from vllm.sampling_params import StructuredOutputsParams # type: ignore
642643

643644
completion_kwargs = self._prepare_completion_kwargs(
644645
messages=messages,
@@ -648,7 +649,9 @@ def generate(
648649
**kwargs,
649650
)
650651
# Override the OpenAI schema for VLLM compatibility
651-
guided_options_request = {"guided_json": response_format["json_schema"]["schema"]} if response_format else None
652+
structured_outputs = (
653+
StructuredOutputsParams(json=response_format["json_schema"]["schema"]) if response_format else None
654+
)
652655

653656
messages = completion_kwargs.pop("messages")
654657
prepared_stop_sequences = completion_kwargs.pop("stop", [])
@@ -667,12 +670,12 @@ def generate(
667670
temperature=kwargs.get("temperature", 0.0),
668671
max_tokens=kwargs.get("max_tokens", 2048),
669672
stop=prepared_stop_sequences,
673+
structured_outputs=structured_outputs,
670674
)
671675

672676
out = self.model.generate(
673677
prompt,
674678
sampling_params=sampling_params,
675-
guided_options_request=guided_options_request,
676679
**completion_kwargs,
677680
)
678681

0 commit comments

Comments
 (0)