|
9 | 9 | "description": "Automatic speech recognition (ASR) models convert a speech signal, typically an audio input, to text." |
10 | 10 | }, |
11 | 11 | "tags": [], |
12 | | - "properties": [ |
13 | | - { |
14 | | - "property_id": "beta", |
15 | | - "value": "true" |
16 | | - } |
17 | | - ], |
| 12 | + "properties": [], |
18 | 13 | "schema": { |
19 | 14 | "input": { |
20 | 15 | "type": "object", |
|
44 | 39 | }, |
45 | 40 | "prefix": { |
46 | 41 | "type": "string", |
47 | | - "description": "The prefix it appended the beginning of the output of the transcription and can guide the transcription result." |
| 42 | + "description": "The prefix it appended the the beginning of the output of the transcription and can guide the transcription result." |
48 | 43 | } |
49 | 44 | }, |
50 | 45 | "required": [ |
|
85 | 80 | "description": "The total number of words in the transcription." |
86 | 81 | }, |
87 | 82 | "segments": { |
88 | | - "type": "object", |
89 | | - "properties": { |
90 | | - "start": { |
91 | | - "type": "number", |
92 | | - "description": "The starting time of the segment within the audio, in seconds." |
93 | | - }, |
94 | | - "end": { |
95 | | - "type": "number", |
96 | | - "description": "The ending time of the segment within the audio, in seconds." |
97 | | - }, |
98 | | - "text": { |
99 | | - "type": "string", |
100 | | - "description": "The transcription of the segment." |
101 | | - }, |
102 | | - "temperature": { |
103 | | - "type": "number", |
104 | | - "description": "The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs." |
105 | | - }, |
106 | | - "avg_logprob": { |
107 | | - "type": "number", |
108 | | - "description": "The average log probability of the predictions for the words in this segment, indicating overall confidence." |
109 | | - }, |
110 | | - "compression_ratio": { |
111 | | - "type": "number", |
112 | | - "description": "The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process." |
113 | | - }, |
114 | | - "no_speech_prob": { |
115 | | - "type": "number", |
116 | | - "description": "The probability that the segment contains no speech, represented as a decimal between 0 and 1." |
117 | | - }, |
118 | | - "words": { |
119 | | - "type": "array", |
120 | | - "items": { |
121 | | - "type": "object", |
122 | | - "properties": { |
123 | | - "word": { |
124 | | - "type": "string", |
125 | | - "description": "The individual word transcribed from the audio." |
126 | | - }, |
127 | | - "start": { |
128 | | - "type": "number", |
129 | | - "description": "The starting time of the word within the audio, in seconds." |
130 | | - }, |
131 | | - "end": { |
132 | | - "type": "number", |
133 | | - "description": "The ending time of the word within the audio, in seconds." |
| 83 | + "type": "array", |
| 84 | + "items": { |
| 85 | + "type": "object", |
| 86 | + "properties": { |
| 87 | + "start": { |
| 88 | + "type": "number", |
| 89 | + "description": "The starting time of the segment within the audio, in seconds." |
| 90 | + }, |
| 91 | + "end": { |
| 92 | + "type": "number", |
| 93 | + "description": "The ending time of the segment within the audio, in seconds." |
| 94 | + }, |
| 95 | + "text": { |
| 96 | + "type": "string", |
| 97 | + "description": "The transcription of the segment." |
| 98 | + }, |
| 99 | + "temperature": { |
| 100 | + "type": "number", |
| 101 | + "description": "The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs." |
| 102 | + }, |
| 103 | + "avg_logprob": { |
| 104 | + "type": "number", |
| 105 | + "description": "The average log probability of the predictions for the words in this segment, indicating overall confidence." |
| 106 | + }, |
| 107 | + "compression_ratio": { |
| 108 | + "type": "number", |
| 109 | + "description": "The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process." |
| 110 | + }, |
| 111 | + "no_speech_prob": { |
| 112 | + "type": "number", |
| 113 | + "description": "The probability that the segment contains no speech, represented as a decimal between 0 and 1." |
| 114 | + }, |
| 115 | + "words": { |
| 116 | + "type": "array", |
| 117 | + "items": { |
| 118 | + "type": "object", |
| 119 | + "properties": { |
| 120 | + "word": { |
| 121 | + "type": "string", |
| 122 | + "description": "The individual word transcribed from the audio." |
| 123 | + }, |
| 124 | + "start": { |
| 125 | + "type": "number", |
| 126 | + "description": "The starting time of the word within the audio, in seconds." |
| 127 | + }, |
| 128 | + "end": { |
| 129 | + "type": "number", |
| 130 | + "description": "The ending time of the word within the audio, in seconds." |
| 131 | + } |
134 | 132 | } |
135 | 133 | } |
136 | 134 | } |
|
0 commit comments