@@ -66,6 +66,7 @@ def chat_stream(
6666 logprobs : typing .Optional [bool ] = OMIT ,
6767 tool_choice : typing .Optional [V2ChatStreamRequestToolChoice ] = OMIT ,
6868 thinking : typing .Optional [Thinking ] = OMIT ,
69+ priority : typing .Optional [int ] = OMIT ,
6970 request_options : typing .Optional [RequestOptions ] = None ,
7071 ) -> typing .Iterator [V2ChatStreamResponse ]:
7172 """
@@ -158,6 +159,10 @@ def chat_stream(
158159
159160 thinking : typing.Optional[Thinking]
160161
162+ priority : typing.Optional[int]
163+ The priority of the request (lower means earlier handling; default 0 highest priority).
164+ Higher priority requests are handled first, and dropped last when the system is under load.
165+
161166 request_options : typing.Optional[RequestOptions]
162167 Request-specific configuration.
163168
@@ -205,6 +210,7 @@ def chat_stream(
205210 logprobs = logprobs ,
206211 tool_choice = tool_choice ,
207212 thinking = thinking ,
213+ priority = priority ,
208214 request_options = request_options ,
209215 ) as r :
210216 yield from r .data
@@ -231,6 +237,7 @@ def chat(
231237 logprobs : typing .Optional [bool ] = OMIT ,
232238 tool_choice : typing .Optional [V2ChatRequestToolChoice ] = OMIT ,
233239 thinking : typing .Optional [Thinking ] = OMIT ,
240+ priority : typing .Optional [int ] = OMIT ,
234241 request_options : typing .Optional [RequestOptions ] = None ,
235242 ) -> V2ChatResponse :
236243 """
@@ -323,6 +330,10 @@ def chat(
323330
324331 thinking : typing.Optional[Thinking]
325332
333+ priority : typing.Optional[int]
334+ The priority of the request (lower means earlier handling; default 0 highest priority).
335+ Higher priority requests are handled first, and dropped last when the system is under load.
336+
326337 request_options : typing.Optional[RequestOptions]
327338 Request-specific configuration.
328339
@@ -368,6 +379,7 @@ def chat(
368379 logprobs = logprobs ,
369380 tool_choice = tool_choice ,
370381 thinking = thinking ,
382+ priority = priority ,
371383 request_options = request_options ,
372384 )
373385 return _response .data
@@ -384,6 +396,7 @@ def embed(
384396 output_dimension : typing .Optional [int ] = OMIT ,
385397 embedding_types : typing .Optional [typing .Sequence [EmbeddingType ]] = OMIT ,
386398 truncate : typing .Optional [V2EmbedRequestTruncate ] = OMIT ,
399+ priority : typing .Optional [int ] = OMIT ,
387400 request_options : typing .Optional [RequestOptions ] = None ,
388401 ) -> EmbedByTypeResponse :
389402 """
@@ -437,6 +450,10 @@ def embed(
437450
438451 If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
439452
453+ priority : typing.Optional[int]
454+ The priority of the request (lower means earlier handling; default 0 highest priority).
455+ Higher priority requests are handled first, and dropped last when the system is under load.
456+
440457 request_options : typing.Optional[RequestOptions]
441458 Request-specific configuration.
442459
@@ -470,6 +487,7 @@ def embed(
470487 output_dimension = output_dimension ,
471488 embedding_types = embedding_types ,
472489 truncate = truncate ,
490+ priority = priority ,
473491 request_options = request_options ,
474492 )
475493 return _response .data
@@ -482,6 +500,7 @@ def rerank(
482500 documents : typing .Sequence [str ],
483501 top_n : typing .Optional [int ] = OMIT ,
484502 max_tokens_per_doc : typing .Optional [int ] = OMIT ,
503+ priority : typing .Optional [int ] = OMIT ,
485504 request_options : typing .Optional [RequestOptions ] = None ,
486505 ) -> V2RerankResponse :
487506 """
@@ -509,6 +528,10 @@ def rerank(
509528 max_tokens_per_doc : typing.Optional[int]
510529 Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
511530
531+ priority : typing.Optional[int]
532+ The priority of the request (lower means earlier handling; default 0 highest priority).
533+ Higher priority requests are handled first, and dropped last when the system is under load.
534+
512535 request_options : typing.Optional[RequestOptions]
513536 Request-specific configuration.
514537
@@ -544,6 +567,7 @@ def rerank(
544567 documents = documents ,
545568 top_n = top_n ,
546569 max_tokens_per_doc = max_tokens_per_doc ,
570+ priority = priority ,
547571 request_options = request_options ,
548572 )
549573 return _response .data
@@ -586,6 +610,7 @@ async def chat_stream(
586610 logprobs : typing .Optional [bool ] = OMIT ,
587611 tool_choice : typing .Optional [V2ChatStreamRequestToolChoice ] = OMIT ,
588612 thinking : typing .Optional [Thinking ] = OMIT ,
613+ priority : typing .Optional [int ] = OMIT ,
589614 request_options : typing .Optional [RequestOptions ] = None ,
590615 ) -> typing .AsyncIterator [V2ChatStreamResponse ]:
591616 """
@@ -678,6 +703,10 @@ async def chat_stream(
678703
679704 thinking : typing.Optional[Thinking]
680705
706+ priority : typing.Optional[int]
707+ The priority of the request (lower means earlier handling; default 0 highest priority).
708+ Higher priority requests are handled first, and dropped last when the system is under load.
709+
681710 request_options : typing.Optional[RequestOptions]
682711 Request-specific configuration.
683712
@@ -733,6 +762,7 @@ async def main() -> None:
733762 logprobs = logprobs ,
734763 tool_choice = tool_choice ,
735764 thinking = thinking ,
765+ priority = priority ,
736766 request_options = request_options ,
737767 ) as r :
738768 async for _chunk in r .data :
@@ -760,6 +790,7 @@ async def chat(
760790 logprobs : typing .Optional [bool ] = OMIT ,
761791 tool_choice : typing .Optional [V2ChatRequestToolChoice ] = OMIT ,
762792 thinking : typing .Optional [Thinking ] = OMIT ,
793+ priority : typing .Optional [int ] = OMIT ,
763794 request_options : typing .Optional [RequestOptions ] = None ,
764795 ) -> V2ChatResponse :
765796 """
@@ -852,6 +883,10 @@ async def chat(
852883
853884 thinking : typing.Optional[Thinking]
854885
886+ priority : typing.Optional[int]
887+ The priority of the request (lower means earlier handling; default 0 highest priority).
888+ Higher priority requests are handled first, and dropped last when the system is under load.
889+
855890 request_options : typing.Optional[RequestOptions]
856891 Request-specific configuration.
857892
@@ -905,6 +940,7 @@ async def main() -> None:
905940 logprobs = logprobs ,
906941 tool_choice = tool_choice ,
907942 thinking = thinking ,
943+ priority = priority ,
908944 request_options = request_options ,
909945 )
910946 return _response .data
@@ -921,6 +957,7 @@ async def embed(
921957 output_dimension : typing .Optional [int ] = OMIT ,
922958 embedding_types : typing .Optional [typing .Sequence [EmbeddingType ]] = OMIT ,
923959 truncate : typing .Optional [V2EmbedRequestTruncate ] = OMIT ,
960+ priority : typing .Optional [int ] = OMIT ,
924961 request_options : typing .Optional [RequestOptions ] = None ,
925962 ) -> EmbedByTypeResponse :
926963 """
@@ -974,6 +1011,10 @@ async def embed(
9741011
9751012 If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
9761013
1014+ priority : typing.Optional[int]
1015+ The priority of the request (lower means earlier handling; default 0 highest priority).
1016+ Higher priority requests are handled first, and dropped last when the system is under load.
1017+
9771018 request_options : typing.Optional[RequestOptions]
9781019 Request-specific configuration.
9791020
@@ -1015,6 +1056,7 @@ async def main() -> None:
10151056 output_dimension = output_dimension ,
10161057 embedding_types = embedding_types ,
10171058 truncate = truncate ,
1059+ priority = priority ,
10181060 request_options = request_options ,
10191061 )
10201062 return _response .data
@@ -1027,6 +1069,7 @@ async def rerank(
10271069 documents : typing .Sequence [str ],
10281070 top_n : typing .Optional [int ] = OMIT ,
10291071 max_tokens_per_doc : typing .Optional [int ] = OMIT ,
1072+ priority : typing .Optional [int ] = OMIT ,
10301073 request_options : typing .Optional [RequestOptions ] = None ,
10311074 ) -> V2RerankResponse :
10321075 """
@@ -1054,6 +1097,10 @@ async def rerank(
10541097 max_tokens_per_doc : typing.Optional[int]
10551098 Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
10561099
1100+ priority : typing.Optional[int]
1101+ The priority of the request (lower means earlier handling; default 0 highest priority).
1102+ Higher priority requests are handled first, and dropped last when the system is under load.
1103+
10571104 request_options : typing.Optional[RequestOptions]
10581105 Request-specific configuration.
10591106
@@ -1097,6 +1144,7 @@ async def main() -> None:
10971144 documents = documents ,
10981145 top_n = top_n ,
10991146 max_tokens_per_doc = max_tokens_per_doc ,
1147+ priority = priority ,
11001148 request_options = request_options ,
11011149 )
11021150 return _response .data
0 commit comments