@@ -19,12 +19,35 @@ module OpenAI
19
19
. returns ( T . any ( OpenAI ::Models ::Audio ::Transcription , OpenAI ::Models ::Audio ::TranscriptionVerbose ) )
20
20
end
21
21
def create (
22
+ # The audio file object (not file name) to transcribe, in one of these formats:
23
+ # flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
22
24
file :,
25
+ # ID of the model to use. Only `whisper-1` (which is powered by our open source
26
+ # Whisper V2 model) is currently available.
23
27
model :,
28
+ # The language of the input audio. Supplying the input language in
29
+ # [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
30
+ # format will improve accuracy and latency.
24
31
language : nil ,
32
+ # An optional text to guide the model's style or continue a previous audio
33
+ # segment. The
34
+ # [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
35
+ # should match the audio language.
25
36
prompt : nil ,
37
+ # The format of the output, in one of these options: `json`, `text`, `srt`,
38
+ # `verbose_json`, or `vtt`.
26
39
response_format : nil ,
40
+ # The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
41
+ # output more random, while lower values like 0.2 will make it more focused and
42
+ # deterministic. If set to 0, the model will use
43
+ # [log probability](https://en.wikipedia.org/wiki/Log_probability) to
44
+ # automatically increase the temperature until certain thresholds are hit.
27
45
temperature : nil ,
46
+ # The timestamp granularities to populate for this transcription.
47
+ # `response_format` must be set `verbose_json` to use timestamp granularities.
48
+ # Either or both of these options are supported: `word`, or `segment`. Note: There
49
+ # is no additional latency for segment timestamps, but generating word timestamps
50
+ # incurs additional latency.
28
51
timestamp_granularities : nil ,
29
52
request_options : { }
30
53
)
0 commit comments