Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/smolagents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,7 @@ def generate(
**kwargs,
) -> ChatMessage:
from vllm import SamplingParams # type: ignore
from vllm.sampling_params import StructuredOutputsParams # type: ignore

completion_kwargs = self._prepare_completion_kwargs(
messages=messages,
Expand All @@ -632,7 +633,7 @@ def generate(
**kwargs,
)
# Override the OpenAI schema for VLLM compatibility
guided_options_request = {"guided_json": response_format["json_schema"]["schema"]} if response_format else None
structured_outputs = StructuredOutputsParams(json=response_format["json_schema"]["schema"]) if response_format else None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@qjflores suggested a fix when he opened the issue,

# Convert old guided_options_request format to new structured_outputs
    structured_outputs_params = None
    if response_format:
        if "json_schema" in response_format:
            # Extract the JSON schema from the response_format
            json_schema = response_format["json_schema"]["schema"]
            structured_outputs_params = StructuredOutputsParams(json=json_schema)
        elif "choice" in response_format:
            # Handle choice-based structured outputs
            structured_outputs_params = StructuredOutputsParams(choice=response_format["choice"])
        elif "regex" in response_format:
            # Handle regex-based structured outputs
            structured_outputs_params = StructuredOutputsParams(regex=response_format["regex"])
        elif "grammar" in response_format:
            # Handle grammar-based structured outputs
            structured_outputs_params = StructuredOutputsParams(grammar=response_format["grammar"])
        elif "structural_tag" in response_format:
            # Handle structural tag-based structured outputs
            structured_outputs_params = StructuredOutputsParams(structural_tag=response_format["structural_tag"])
        else:
            print(f"WARNING: Unsupported response_format type: {response_format}")
            structured_outputs_params = None

But if I understand correctly, JSON is the only structured output param that is used

additional_args["response_format"] = CODEAGENT_RESPONSE_FORMAT

CODEAGENT_RESPONSE_FORMAT description
"json_schema": {

So I simplified his solution and incorporated it in the PR

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense to me


messages = completion_kwargs.pop("messages")
prepared_stop_sequences = completion_kwargs.pop("stop", [])
Expand All @@ -651,12 +652,12 @@ def generate(
temperature=kwargs.get("temperature", 0.0),
max_tokens=kwargs.get("max_tokens", 2048),
stop=prepared_stop_sequences,
structured_outputs=structured_outputs,
)

out = self.model.generate(
prompt,
sampling_params=sampling_params,
guided_options_request=guided_options_request,
**completion_kwargs,
)

Expand Down