@@ -47,6 +47,7 @@ async def acreate(
4747 guided_json : Optional [Dict [str , Any ]] = None ,
4848 guided_regex : Optional [str ] = None ,
4949 guided_choice : Optional [List [str ]] = None ,
50+ guided_grammar : Optional [str ] = None ,
5051 timeout : int = COMPLETION_TIMEOUT ,
5152 stream : bool = False ,
5253 ) -> Union [CompletionSyncResponse , AsyncIterable [CompletionStreamResponse ]]:
@@ -118,6 +119,9 @@ async def acreate(
118119 guided_choice (Optional[List[str]]):
119120 If specified, the output will be exactly one of the choices.
120121
122+ guided_grammar (Optional[str]):
123+ If specified, the output will follow the context-free grammar provided.
124+
121125 timeout (int):
122126 Timeout in seconds. This is the maximum amount of time you are willing to wait for a response.
123127
@@ -218,6 +222,7 @@ async def _acreate_stream(
218222 guided_json = guided_json ,
219223 guided_regex = guided_regex ,
220224 guided_choice = guided_choice ,
225+ guided_grammar = guided_grammar ,
221226 timeout = timeout ,
222227 )
223228
@@ -242,6 +247,11 @@ async def _acreate_sync(**kwargs) -> CompletionSyncResponse:
242247 frequency_penalty = frequency_penalty ,
243248 top_k = top_k ,
244249 top_p = top_p ,
250+ include_stop_str_in_output = include_stop_str_in_output ,
251+ guided_json = guided_json ,
252+ guided_regex = guided_regex ,
253+ guided_choice = guided_choice ,
254+ guided_grammar = guided_grammar ,
245255 )
246256
247257 @classmethod
@@ -261,6 +271,7 @@ def create(
261271 guided_json : Optional [Dict [str , Any ]] = None ,
262272 guided_regex : Optional [str ] = None ,
263273 guided_choice : Optional [List [str ]] = None ,
274+ guided_grammar : Optional [str ] = None ,
264275 timeout : int = COMPLETION_TIMEOUT ,
265276 stream : bool = False ,
266277 ) -> Union [CompletionSyncResponse , Iterator [CompletionStreamResponse ]]:
@@ -333,6 +344,9 @@ def create(
333344 guided_choice (Optional[List[str]]):
334345 If specified, the output will be exactly one of the choices.
335346
347+ guided_grammar (Optional[str]):
348+ If specified, the output will follow the context-free grammar provided.
349+
336350 timeout (int):
337351 Timeout in seconds. This is the maximum amount of time you are willing to wait for a response.
338352
@@ -419,6 +433,11 @@ def _create_stream(**kwargs):
419433 frequency_penalty = frequency_penalty ,
420434 top_k = top_k ,
421435 top_p = top_p ,
436+ include_stop_str_in_output = include_stop_str_in_output ,
437+ guided_json = guided_json ,
438+ guided_regex = guided_regex ,
439+ guided_choice = guided_choice ,
440+ guided_grammar = guided_grammar ,
422441 )
423442
424443 else :
@@ -436,6 +455,7 @@ def _create_stream(**kwargs):
436455 guided_json = guided_json ,
437456 guided_regex = guided_regex ,
438457 guided_choice = guided_choice ,
458+ guided_grammar = guided_grammar ,
439459 ).dict ()
440460 response = cls .post_sync (
441461 resource_name = f"v1/llm/completions-sync?model_endpoint_name={ model } " ,
0 commit comments