@@ -271,6 +271,131 @@ def create(
271
271
"""
272
272
...
273
273
274
+ @overload
275
+ def create (
276
+ self ,
277
+ * ,
278
+ messages : List [ChatCompletionMessageParam ],
279
+ model : Union [
280
+ str ,
281
+ Literal [
282
+ "gpt-4" ,
283
+ "gpt-4-0314" ,
284
+ "gpt-4-0613" ,
285
+ "gpt-4-32k" ,
286
+ "gpt-4-32k-0314" ,
287
+ "gpt-4-32k-0613" ,
288
+ "gpt-3.5-turbo" ,
289
+ "gpt-3.5-turbo-16k" ,
290
+ "gpt-3.5-turbo-0301" ,
291
+ "gpt-3.5-turbo-0613" ,
292
+ "gpt-3.5-turbo-16k-0613" ,
293
+ ],
294
+ ],
295
+ stream : bool ,
296
+ frequency_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
297
+ function_call : completion_create_params .FunctionCall | NotGiven = NOT_GIVEN ,
298
+ functions : List [completion_create_params .Function ] | NotGiven = NOT_GIVEN ,
299
+ logit_bias : Optional [Dict [str , int ]] | NotGiven = NOT_GIVEN ,
300
+ max_tokens : Optional [int ] | NotGiven = NOT_GIVEN ,
301
+ n : Optional [int ] | NotGiven = NOT_GIVEN ,
302
+ presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
303
+ stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
304
+ temperature : Optional [float ] | NotGiven = NOT_GIVEN ,
305
+ top_p : Optional [float ] | NotGiven = NOT_GIVEN ,
306
+ user : str | NotGiven = NOT_GIVEN ,
307
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
308
+ # The extra values given here take precedence over values defined on the client or passed to this method.
309
+ extra_headers : Headers | None = None ,
310
+ extra_query : Query | None = None ,
311
+ extra_body : Body | None = None ,
312
+ timeout : float | None | NotGiven = NOT_GIVEN ,
313
+ ) -> ChatCompletion | Stream [ChatCompletionChunk ]:
314
+ """
315
+ Creates a model response for the given chat conversation.
316
+
317
+ Args:
318
+ messages: A list of messages comprising the conversation so far.
319
+ [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
320
+
321
+ model: ID of the model to use. See the
322
+ [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
323
+ table for details on which models work with the Chat API.
324
+
325
+ stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
326
+ sent as data-only
327
+ [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
328
+ as they become available, with the stream terminated by a `data: [DONE]`
329
+ message.
330
+ [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
331
+
332
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
333
+ existing frequency in the text so far, decreasing the model's likelihood to
334
+ repeat the same line verbatim.
335
+
336
+ [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
337
+
338
+ function_call: Controls how the model calls functions. "none" means the model will not call a
339
+ function and instead generates a message. "auto" means the model can pick
340
+ between generating a message or calling a function. Specifying a particular
341
+ function via `{"name": "my_function"}` forces the model to call that function.
342
+ "none" is the default when no functions are present. "auto" is the default if
343
+ functions are present.
344
+
345
+ functions: A list of functions the model may generate JSON inputs for.
346
+
347
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
348
+
349
+ Accepts a json object that maps tokens (specified by their token ID in the
350
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
351
+ bias is added to the logits generated by the model prior to sampling. The exact
352
+ effect will vary per model, but values between -1 and 1 should decrease or
353
+ increase likelihood of selection; values like -100 or 100 should result in a ban
354
+ or exclusive selection of the relevant token.
355
+
356
+ max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
357
+
358
+ The total length of input tokens and generated tokens is limited by the model's
359
+ context length.
360
+ [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
361
+ for counting tokens.
362
+
363
+ n: How many chat completion choices to generate for each input message.
364
+
365
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
366
+ whether they appear in the text so far, increasing the model's likelihood to
367
+ talk about new topics.
368
+
369
+ [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
370
+
371
+ stop: Up to 4 sequences where the API will stop generating further tokens.
372
+
373
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
374
+ make the output more random, while lower values like 0.2 will make it more
375
+ focused and deterministic.
376
+
377
+ We generally recommend altering this or `top_p` but not both.
378
+
379
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
380
+ model considers the results of the tokens with top_p probability mass. So 0.1
381
+ means only the tokens comprising the top 10% probability mass are considered.
382
+
383
+ We generally recommend altering this or `temperature` but not both.
384
+
385
+ user: A unique identifier representing your end-user, which can help OpenAI to monitor
386
+ and detect abuse.
387
+ [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
388
+
389
+ extra_headers: Send extra headers
390
+
391
+ extra_query: Add additional query parameters to the request
392
+
393
+ extra_body: Add additional JSON properties to the request
394
+
395
+ timeout: Override the client-level default timeout for this request, in seconds
396
+ """
397
+ ...
398
+
274
399
@required_args (["messages" , "model" ], ["messages" , "model" , "stream" ])
275
400
def create (
276
401
self ,
@@ -592,6 +717,131 @@ async def create(
592
717
"""
593
718
...
594
719
720
+ @overload
721
+ async def create (
722
+ self ,
723
+ * ,
724
+ messages : List [ChatCompletionMessageParam ],
725
+ model : Union [
726
+ str ,
727
+ Literal [
728
+ "gpt-4" ,
729
+ "gpt-4-0314" ,
730
+ "gpt-4-0613" ,
731
+ "gpt-4-32k" ,
732
+ "gpt-4-32k-0314" ,
733
+ "gpt-4-32k-0613" ,
734
+ "gpt-3.5-turbo" ,
735
+ "gpt-3.5-turbo-16k" ,
736
+ "gpt-3.5-turbo-0301" ,
737
+ "gpt-3.5-turbo-0613" ,
738
+ "gpt-3.5-turbo-16k-0613" ,
739
+ ],
740
+ ],
741
+ stream : bool ,
742
+ frequency_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
743
+ function_call : completion_create_params .FunctionCall | NotGiven = NOT_GIVEN ,
744
+ functions : List [completion_create_params .Function ] | NotGiven = NOT_GIVEN ,
745
+ logit_bias : Optional [Dict [str , int ]] | NotGiven = NOT_GIVEN ,
746
+ max_tokens : Optional [int ] | NotGiven = NOT_GIVEN ,
747
+ n : Optional [int ] | NotGiven = NOT_GIVEN ,
748
+ presence_penalty : Optional [float ] | NotGiven = NOT_GIVEN ,
749
+ stop : Union [Optional [str ], List [str ]] | NotGiven = NOT_GIVEN ,
750
+ temperature : Optional [float ] | NotGiven = NOT_GIVEN ,
751
+ top_p : Optional [float ] | NotGiven = NOT_GIVEN ,
752
+ user : str | NotGiven = NOT_GIVEN ,
753
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
754
+ # The extra values given here take precedence over values defined on the client or passed to this method.
755
+ extra_headers : Headers | None = None ,
756
+ extra_query : Query | None = None ,
757
+ extra_body : Body | None = None ,
758
+ timeout : float | None | NotGiven = NOT_GIVEN ,
759
+ ) -> ChatCompletion | AsyncStream [ChatCompletionChunk ]:
760
+ """
761
+ Creates a model response for the given chat conversation.
762
+
763
+ Args:
764
+ messages: A list of messages comprising the conversation so far.
765
+ [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
766
+
767
+ model: ID of the model to use. See the
768
+ [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
769
+ table for details on which models work with the Chat API.
770
+
771
+ stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
772
+ sent as data-only
773
+ [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
774
+ as they become available, with the stream terminated by a `data: [DONE]`
775
+ message.
776
+ [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
777
+
778
+ frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
779
+ existing frequency in the text so far, decreasing the model's likelihood to
780
+ repeat the same line verbatim.
781
+
782
+ [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
783
+
784
+ function_call: Controls how the model calls functions. "none" means the model will not call a
785
+ function and instead generates a message. "auto" means the model can pick
786
+ between generating a message or calling a function. Specifying a particular
787
+ function via `{"name": "my_function"}` forces the model to call that function.
788
+ "none" is the default when no functions are present. "auto" is the default if
789
+ functions are present.
790
+
791
+ functions: A list of functions the model may generate JSON inputs for.
792
+
793
+ logit_bias: Modify the likelihood of specified tokens appearing in the completion.
794
+
795
+ Accepts a json object that maps tokens (specified by their token ID in the
796
+ tokenizer) to an associated bias value from -100 to 100. Mathematically, the
797
+ bias is added to the logits generated by the model prior to sampling. The exact
798
+ effect will vary per model, but values between -1 and 1 should decrease or
799
+ increase likelihood of selection; values like -100 or 100 should result in a ban
800
+ or exclusive selection of the relevant token.
801
+
802
+ max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion.
803
+
804
+ The total length of input tokens and generated tokens is limited by the model's
805
+ context length.
806
+ [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
807
+ for counting tokens.
808
+
809
+ n: How many chat completion choices to generate for each input message.
810
+
811
+ presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
812
+ whether they appear in the text so far, increasing the model's likelihood to
813
+ talk about new topics.
814
+
815
+ [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details)
816
+
817
+ stop: Up to 4 sequences where the API will stop generating further tokens.
818
+
819
+ temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
820
+ make the output more random, while lower values like 0.2 will make it more
821
+ focused and deterministic.
822
+
823
+ We generally recommend altering this or `top_p` but not both.
824
+
825
+ top_p: An alternative to sampling with temperature, called nucleus sampling, where the
826
+ model considers the results of the tokens with top_p probability mass. So 0.1
827
+ means only the tokens comprising the top 10% probability mass are considered.
828
+
829
+ We generally recommend altering this or `temperature` but not both.
830
+
831
+ user: A unique identifier representing your end-user, which can help OpenAI to monitor
832
+ and detect abuse.
833
+ [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
834
+
835
+ extra_headers: Send extra headers
836
+
837
+ extra_query: Add additional query parameters to the request
838
+
839
+ extra_body: Add additional JSON properties to the request
840
+
841
+ timeout: Override the client-level default timeout for this request, in seconds
842
+ """
843
+ ...
844
+
595
845
@required_args (["messages" , "model" ], ["messages" , "model" , "stream" ])
596
846
async def create (
597
847
self ,
0 commit comments