|
1 | | -from typing import AsyncIterable, Iterator, List, Optional, Union |
| 1 | +from typing import Any, AsyncIterable, Dict, Iterator, List, Optional, Union |
2 | 2 |
|
3 | 3 | from llmengine.api_engine import APIEngine |
4 | 4 | from llmengine.data_types import ( |
@@ -43,6 +43,10 @@ async def acreate( |
43 | 43 | frequency_penalty: Optional[float] = None, |
44 | 44 | top_k: Optional[int] = None, |
45 | 45 | top_p: Optional[float] = None, |
| 46 | + include_stop_str_in_output: Optional[bool] = False, |
| 47 | + guided_json: Optional[Dict[str, Any]] = None, |
| 48 | + guided_regex: Optional[str] = None, |
| 49 | + guided_choice: Optional[List[str]] = None, |
46 | 50 | timeout: int = COMPLETION_TIMEOUT, |
47 | 51 | stream: bool = False, |
48 | 52 | ) -> Union[CompletionSyncResponse, AsyncIterable[CompletionStreamResponse]]: |
@@ -102,6 +106,18 @@ async def acreate( |
102 | 106 | Float that controls the cumulative probability of the top tokens to consider. |
103 | 107 | Range: (0.0, 1.0]. 1.0 means consider all tokens. |
104 | 108 |
|
| 109 | + include_stop_str_in_output (Optional[bool]): |
| 110 | + Whether to include the stop sequence in the output. Default to False. |
| 111 | +
|
| 112 | + guided_json (Optional[Dict[str, Any]]): |
| 113 | + If specified, the output will follow the JSON schema. For examples see https://json-schema.org/learn/miscellaneous-examples. |
| 114 | +
|
| 115 | + guided_regex (Optional[str]): |
| 116 | + If specified, the output will follow the regex pattern. |
| 117 | +
|
| 118 | + guided_choice (Optional[List[str]]): |
| 119 | + If specified, the output will be exactly one of the choices. |
| 120 | +
|
105 | 121 | timeout (int): |
106 | 122 | Timeout in seconds. This is the maximum amount of time you are willing to wait for a response. |
107 | 123 |
|
@@ -198,6 +214,10 @@ async def _acreate_stream( |
198 | 214 | frequency_penalty=frequency_penalty, |
199 | 215 | top_k=top_k, |
200 | 216 | top_p=top_p, |
| 217 | + include_stop_str_in_output=include_stop_str_in_output, |
| 218 | + guided_json=guided_json, |
| 219 | + guided_regex=guided_regex, |
| 220 | + guided_choice=guided_choice, |
201 | 221 | timeout=timeout, |
202 | 222 | ) |
203 | 223 |
|
@@ -237,6 +257,10 @@ def create( |
237 | 257 | frequency_penalty: Optional[float] = None, |
238 | 258 | top_k: Optional[int] = None, |
239 | 259 | top_p: Optional[float] = None, |
| 260 | + include_stop_str_in_output: Optional[bool] = False, |
| 261 | + guided_json: Optional[Dict[str, Any]] = None, |
| 262 | + guided_regex: Optional[str] = None, |
| 263 | + guided_choice: Optional[List[str]] = None, |
240 | 264 | timeout: int = COMPLETION_TIMEOUT, |
241 | 265 | stream: bool = False, |
242 | 266 | ) -> Union[CompletionSyncResponse, Iterator[CompletionStreamResponse]]: |
@@ -297,6 +321,18 @@ def create( |
297 | 321 | Float that controls the cumulative probability of the top tokens to consider. |
298 | 322 | Range: (0.0, 1.0]. 1.0 means consider all tokens. |
299 | 323 |
|
| 324 | + include_stop_str_in_output (Optional[bool]): |
| 325 | + Whether to include the stop sequence in the output. Default to False. |
| 326 | +
|
| 327 | + guided_json (Optional[Dict[str, Any]]): |
| 328 | + If specified, the output will follow the JSON schema. |
| 329 | +
|
| 330 | + guided_regex (Optional[str]): |
| 331 | + If specified, the output will follow the regex pattern. |
| 332 | +
|
| 333 | + guided_choice (Optional[List[str]]): |
| 334 | + If specified, the output will be exactly one of the choices. |
| 335 | +
|
300 | 336 | timeout (int): |
301 | 337 | Timeout in seconds. This is the maximum amount of time you are willing to wait for a response. |
302 | 338 |
|
@@ -396,6 +432,10 @@ def _create_stream(**kwargs): |
396 | 432 | frequency_penalty=frequency_penalty, |
397 | 433 | top_k=top_k, |
398 | 434 | top_p=top_p, |
| 435 | + include_stop_str_in_output=include_stop_str_in_output, |
| 436 | + guided_json=guided_json, |
| 437 | + guided_regex=guided_regex, |
| 438 | + guided_choice=guided_choice, |
399 | 439 | ).dict() |
400 | 440 | response = cls.post_sync( |
401 | 441 | resource_name=f"v1/llm/completions-sync?model_endpoint_name={model}", |
|
0 commit comments