11#nullable enable
22using System ;
3- using System . Collections . Generic ;
43using System . IO ;
54using System . Net . Http ;
65using System . Threading ;
7- using System . Threading . Tasks ;
8- using Newtonsoft . Json ;
6+ using Cysharp . Threading . Tasks ;
97
108namespace Mochineko . Whisper_API
119{
1210 /// <summary>
1311 /// OpenAI Whisper transcription API.
1412 /// https://platform.openai.com/docs/api-reference/audio/create
1513 /// </summary>
16- public sealed class Transcription
14+ public static class Transcription
1715 {
1816 private const string EndPoint = "https://api.openai.com/v1/audio/transcriptions" ;
1917
20- private readonly IReadOnlyDictionary < string , string > headers ;
21- private readonly TranscriptionRequestBody requestBody ;
22-
23- private static HttpClient HttpClient
24- => HttpClientPool . PooledClient ;
25-
2618 /// <summary>
27- /// Create an instance of Whisper transcription API connection .
19+ /// Transcribes speech audio into text by Whisper transcription API.
2820 /// </summary>
29- /// <param name="apiKey">API key generated by OpenAI</param>
30- /// <param name="model">Speech recognition model</param>
31- /// <exception cref="ArgumentNullException">API Key must be set</exception>
32- public Transcription ( string apiKey , Model model = Model . Whisper1 )
21+ public static async UniTask < string > TranscribeAsync (
22+ string apiKey ,
23+ HttpClient httpClient ,
24+ Stream fileStream ,
25+ string fileName ,
26+ Model model ,
27+ CancellationToken cancellationToken ,
28+ string ? prompt = null ,
29+ string ? responseFormat = null ,
30+ float ? temperature = null ,
31+ string ? language = null
32+ )
3333 {
34- if ( string . IsNullOrEmpty ( apiKey ) )
35- {
36- throw new ArgumentNullException ( nameof ( apiKey ) ) ;
37- }
34+ cancellationToken . ThrowIfCancellationRequested ( ) ;
3835
39- this . headers = CreateHeader ( apiKey ) ;
40- this . requestBody = new TranscriptionRequestBody ( string . Empty , model ) ;
41- }
42-
43- /// <summary>
44- /// Create an instance of Whisper transcription API connection.
45- /// </summary>
46- /// <param name="apiKey">API key generated by OpenAI</param>
47- /// <param name="requestBody">Request parameters</param>
48- /// <exception cref="ArgumentNullException">API Key must be set</exception>
49- public Transcription ( string apiKey , TranscriptionRequestBody requestBody )
50- {
5136 if ( string . IsNullOrEmpty ( apiKey ) )
5237 {
5338 throw new ArgumentNullException ( nameof ( apiKey ) ) ;
5439 }
5540
56- this . headers = CreateHeader ( apiKey ) ;
57- this . requestBody = requestBody ;
58- }
59-
60- private static IReadOnlyDictionary < string , string > CreateHeader ( string apiKey )
61- => new Dictionary < string , string >
41+ if ( string . IsNullOrEmpty ( fileName ) )
6242 {
63- [ "Authorization" ] = $ "Bearer { apiKey } " ,
64- } ;
43+ throw new ArgumentNullException ( nameof ( fileName ) ) ;
44+ }
6545
66- private static HttpRequestMessage CreateRequestMessage (
67- IReadOnlyDictionary < string , string > headers ,
68- TranscriptionRequestBody requestBody ,
69- Stream fileStream )
70- {
71- var requestMessage = new HttpRequestMessage ( HttpMethod . Post , EndPoint ) ;
72- foreach ( var header in headers )
46+ if ( ! TranscriptionRequestBody . IsAvailableFormat ( fileName ) )
7347 {
74- requestMessage . Headers . Add ( header . Key , header . Value ) ;
48+ throw new InvalidDataException ( fileName ) ;
7549 }
50+
51+ var requestBody = new TranscriptionRequestBody (
52+ fileName ,
53+ model ,
54+ prompt ,
55+ responseFormat ,
56+ temperature ,
57+ language ) ;
58+
59+ using var requestMessage = new HttpRequestMessage ( HttpMethod . Post , EndPoint ) ;
60+ requestMessage . Headers . Add ( "Authorization" , $ "Bearer { apiKey } ") ;
7661
7762 var requestContent = new MultipartFormDataContent ( ) ;
7863
7964 requestContent . Add (
8065 content : new StringContent (
81- content : $ " { requestBody . Model } " ,
66+ content : requestBody . Model ,
8267 encoding : System . Text . Encoding . UTF8 ) ,
8368 name : "model" ) ;
8469
@@ -89,92 +74,9 @@ private static HttpRequestMessage CreateRequestMessage(
8974
9075 requestMessage . Content = requestContent ;
9176
92- return requestMessage ;
93- }
94-
95- /// <summary>
96- /// Transcribes speech audio into text by Whisper transcription API.
97- /// </summary>
98- /// <param name="filePath"></param>
99- /// <param name="cancellationToken"></param>
100- /// <returns>Response text</returns>
101- /// <exception cref="Exception">System exceptions</exception>
102- /// <exception cref="APIErrorException">API error response</exception>
103- /// <exception cref="HttpRequestException">Network error</exception>
104- /// <exception cref="TaskCanceledException">Cancellation or timeout</exception>
105- /// <exception cref="JsonSerializationException">JSON error</exception>
106- /// <exception cref="ArgumentNullException">File path is null or empty</exception>
107- /// <exception cref="FileNotFoundException">Audio file not found</exception>
108- /// <exception cref="InvalidDataException">Not available audio format</exception>
109- /// <exception cref="OperationCanceledException">Cancelled on called</exception>
110- public async Task < string > TranscribeFromFileAsync ( string filePath , CancellationToken cancellationToken )
111- {
112- cancellationToken . ThrowIfCancellationRequested ( ) ;
113-
114- if ( string . IsNullOrEmpty ( filePath ) )
115- {
116- throw new ArgumentNullException ( nameof ( filePath ) ) ;
117- }
118-
119- if ( ! File . Exists ( filePath ) )
120- {
121- throw new FileNotFoundException ( filePath ) ;
122- }
123-
124- if ( ! TranscriptionRequestBody . IsAvailableFormat ( filePath ) )
125- {
126- throw new InvalidDataException ( filePath ) ;
127- }
128-
129- await using var stream = File . OpenRead ( filePath ) ;
130-
131- return await TranscribeAsync (
132- fileStream : stream ,
133- fileName : Path . GetFileName ( filePath ) ,
134- cancellationToken ) ;
135- }
136-
137- /// <summary>
138- /// Transcribes speech audio into text by Whisper transcription API.
139- /// </summary>
140- /// <param name="fileStream">File data stream of speech audio</param>
141- /// <param name="fileName">File name of speech audio</param>
142- /// <param name="cancellationToken">Cancellation token</param>
143- /// <returns>Response text.</returns>
144- /// <exception cref="Exception">System exceptions</exception>
145- /// <exception cref="APIErrorException">API error response</exception>
146- /// <exception cref="HttpRequestException">Network error</exception>
147- /// <exception cref="TaskCanceledException">Cancellation or timeout</exception>
148- /// <exception cref="JsonSerializationException">JSON error</exception>
149- /// <exception cref="ArgumentNullException">Argument is null or empty</exception>
150- /// <exception cref="InvalidDataException">Not available audio format</exception>
151- /// <exception cref="OperationCanceledException">Cancelled on called</exception>
152- public async Task < string > TranscribeAsync (
153- Stream fileStream ,
154- string fileName ,
155- CancellationToken cancellationToken )
156- {
157- cancellationToken . ThrowIfCancellationRequested ( ) ;
158-
159- if ( string . IsNullOrEmpty ( fileName ) )
160- {
161- throw new ArgumentNullException ( nameof ( fileName ) ) ;
162- }
163-
164- if ( ! TranscriptionRequestBody . IsAvailableFormat ( fileName ) )
165- {
166- throw new InvalidDataException ( fileName ) ;
167- }
168-
169- requestBody . File = fileName ;
170-
171- using var requestMessage = CreateRequestMessage (
172- headers ,
173- requestBody ,
174- fileStream ) ;
175-
17677 // Post request and receive response
177- using var responseMessage = await HttpClient . SendAsync ( requestMessage , cancellationToken ) ;
78+ using var responseMessage = await httpClient
79+ . SendAsync ( requestMessage , cancellationToken ) ;
17880 if ( responseMessage == null )
17981 {
18082 throw new Exception ( $ "[Whisper_API.Transcription] HttpResponseMessage is null.") ;
@@ -209,5 +111,46 @@ public async Task<string> TranscribeAsync(
209111 }
210112 }
211113 }
114+
115+ /// <summary>
116+ /// Transcribes speech audio into text by Whisper transcription API.
117+ /// </summary>
118+ public static async UniTask < string > TranscribeFromFileAsync (
119+ string apiKey ,
120+ HttpClient httpClient ,
121+ string filePath ,
122+ Model model ,
123+ CancellationToken cancellationToken ,
124+ string ? prompt = null ,
125+ string ? responseFormat = null ,
126+ float ? temperature = null ,
127+ string ? language = null )
128+ {
129+ cancellationToken . ThrowIfCancellationRequested ( ) ;
130+
131+ if ( string . IsNullOrEmpty ( filePath ) )
132+ {
133+ throw new ArgumentNullException ( nameof ( filePath ) ) ;
134+ }
135+
136+ if ( ! File . Exists ( filePath ) )
137+ {
138+ throw new FileNotFoundException ( filePath ) ;
139+ }
140+
141+ await using var stream = File . OpenRead ( filePath ) ;
142+
143+ return await TranscribeAsync (
144+ apiKey ,
145+ httpClient ,
146+ fileStream : stream ,
147+ fileName : Path . GetFileName ( filePath ) ,
148+ model ,
149+ cancellationToken ,
150+ prompt ,
151+ responseFormat ,
152+ temperature ,
153+ language ) ;
154+ }
212155 }
213156}
0 commit comments