File tree Expand file tree Collapse file tree 2 files changed +6
-3
lines changed Expand file tree Collapse file tree 2 files changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -82,7 +82,7 @@ vision = [
8282 " selenium" ,
8383]
8484vllm = [
85- " vllm" ,
85+ " vllm>=0.10.2 " ,
8686 " torch"
8787]
8888all = [
Original file line number Diff line number Diff line change @@ -639,6 +639,7 @@ def generate(
639639 ** kwargs ,
640640 ) -> ChatMessage :
641641 from vllm import SamplingParams # type: ignore
642+ from vllm .sampling_params import StructuredOutputsParams # type: ignore
642643
643644 completion_kwargs = self ._prepare_completion_kwargs (
644645 messages = messages ,
@@ -648,7 +649,9 @@ def generate(
648649 ** kwargs ,
649650 )
650651 # Override the OpenAI schema for VLLM compatibility
651- guided_options_request = {"guided_json" : response_format ["json_schema" ]["schema" ]} if response_format else None
652+ structured_outputs = (
653+ StructuredOutputsParams (json = response_format ["json_schema" ]["schema" ]) if response_format else None
654+ )
652655
653656 messages = completion_kwargs .pop ("messages" )
654657 prepared_stop_sequences = completion_kwargs .pop ("stop" , [])
@@ -667,12 +670,12 @@ def generate(
667670 temperature = kwargs .get ("temperature" , 0.0 ),
668671 max_tokens = kwargs .get ("max_tokens" , 2048 ),
669672 stop = prepared_stop_sequences ,
673+ structured_outputs = structured_outputs ,
670674 )
671675
672676 out = self .model .generate (
673677 prompt ,
674678 sampling_params = sampling_params ,
675- guided_options_request = guided_options_request ,
676679 ** completion_kwargs ,
677680 )
678681
You can’t perform that action at this time.
0 commit comments