| 
1 | 1 | # Whisper-API-unity  | 
2 |  | -Binds [Whisper transcription and translation API](https://platform.openai.com/docs/api-reference/audio) to pure C# on Unity.  | 
 | 2 | +A client library of OpenAI [Whisper transcription and translation API](https://platform.openai.com/docs/api-reference/audio) for Unity.  | 
3 | 3 | 
 
  | 
4 | 4 | See also [official document](https://platform.openai.com/docs/guides/speech-to-text).  | 
5 | 5 | 
 
  | 
6 |  | -## How to import by UnityPackageManager  | 
 | 6 | +## Features  | 
7 | 7 | 
 
  | 
8 |  | -Add dependencies:  | 
 | 8 | +- Transcription  | 
 | 9 | +  - Speech audio file to text in speeched language.   | 
 | 10 | +- Translation  | 
 | 11 | +  - Speech audio file to text in English.  | 
 | 12 | + | 
 | 13 | +## How to import by Unity Package Manager  | 
 | 14 | + | 
 | 15 | +Add following dependencies to your `/Packages/manifest.json`.  | 
9 | 16 | 
 
  | 
10 | 17 | ```json  | 
11 | 18 | {  | 
12 | 19 |   "dependencies": {  | 
13 |  | -    "com.mochineko.whisper-api": "https://github.com/mochi-neko/Whisper-API-unity.git?path=/Assets/Mochineko/Whisper_API#0.1.0",  | 
14 |  | -    "com.unity.nuget.newtonsoft-json": "3.0.2",  | 
 | 20 | +    "com.mochineko.whisper-api": "https://github.com/mochi-neko/Whisper-API-unity.git?path=/Assets/Mochineko/Whisper_API#1.0.0",  | 
15 | 21 |     ...  | 
16 | 22 |   }  | 
17 | 23 | }  | 
18 | 24 | ```  | 
19 | 25 | 
 
  | 
20 |  | -to your `mainfest.json`.  | 
21 |  | - | 
22 |  | -If you have already used Newtonsoft.Json on your project, remove dependency:`"com.unity.nuget.newtonsoft-json": "3.0.2",`.  | 
23 |  | - | 
24 |  | -## How to use transcription speech audio into text by Whisper API  | 
25 |  | - | 
26 |  | -1. Generate API key on [OpenAI](https://platform.openai.com/account/api-keys). (Take care your API key, this is a secret information then you should not open.)  | 
27 |  | -2. Create an instance of `WhisperTranscriptionConnection` with API key.  | 
28 |  | -3. Set file path of speech audio e.g. `/some/path/of/file/audio.mp3`, and call `WhisperTranscriptionConnection.TranscribeFromFileAsync`.  | 
29 |  | -4. Because deault response text format is JSON, you can exclude text by `APIResponseBody.FromJson(json)?.Text`.  | 
 | 26 | +## How to use  | 
30 | 27 | 
 
  | 
31 |  | -An essential sample code of transcription with [UniTask](https://github.com/Cysharp/UniTask) is as follows:  | 
32 |  | - | 
33 |  | -```cs  | 
34 |  | -#nullable enable  | 
35 |  | -using System;  | 
36 |  | -using Cysharp.Threading.Tasks;  | 
37 |  | -using UnityEngine;  | 
38 |  | -using UnityEngine.Assertions;  | 
39 |  | -using Mochineko.Whisper_API.;  | 
40 |  | -using Mochineko.Whisper_API.Transcription;  | 
41 |  | - | 
42 |  | -namespace XXX  | 
43 |  | -{  | 
44 |  | -    /// <summary>  | 
45 |  | -    /// A sample component to transcribe speech by Whisper API on Unity.  | 
46 |  | -    /// </summary>  | 
47 |  | -    public sealed class TranscriptionSample : MonoBehaviour  | 
48 |  | -    {  | 
49 |  | -        /// <summary>  | 
50 |  | -        /// API key generated by OpenAPI.  | 
51 |  | -        /// </summary>  | 
52 |  | -        [SerializeField] private string apiKey = string.Empty;  | 
53 |  | - | 
54 |  | -        /// <summary>  | 
55 |  | -        /// File path of speech audio.  | 
56 |  | -        /// </summary>  | 
57 |  | -        [SerializeField] private string filePath = string.Empty;  | 
58 |  | - | 
59 |  | -        private WhisperTranscriptionConnection? connection;  | 
60 |  | - | 
61 |  | -        private void Start()  | 
62 |  | -        {  | 
63 |  | -            // Create instance of WhisperTranscriptionConnection.  | 
64 |  | -            connection = new WhisperTranscriptionConnection(apiKey);  | 
65 |  | -              | 
66 |  | -            // If you want to specify response format, language, etc..., please use other initialization:  | 
67 |  | -            // connection = new WhisperTranscriptionConnection(apiKey, new APIRequestBody(  | 
68 |  | -            //     file: "",  | 
69 |  | -            //     model: "whisper-1",  | 
70 |  | -            //     prompt: "Some prompts",  | 
71 |  | -            //     responseFormat: "json",  | 
72 |  | -            //     temperature: 1f,  | 
73 |  | -            //     language: "ja"));  | 
74 |  | -        }  | 
75 |  | - | 
76 |  | -        [ContextMenu(nameof(Transcribe))]  | 
77 |  | -        public async void Transcribe()  | 
78 |  | -        {  | 
79 |  | -            string result;  | 
80 |  | -            try  | 
81 |  | -            {  | 
82 |  | -                // Transcribe speech by Whisper speech to text API.  | 
83 |  | -                result = await connection  | 
84 |  | -                    .TranscribeFromFileAsync(filePath, this.GetCancellationTokenOnDestroy());  | 
85 |  | -            }  | 
86 |  | -            catch (Exception e)  | 
87 |  | -            {  | 
88 |  | -                // Exceptions should be caught.  | 
89 |  | -                Debug.LogException(e);  | 
90 |  | -                return;  | 
91 |  | -            }  | 
92 |  | - | 
93 |  | -            // Default text response format is JSON.  | 
94 |  | -            var text = APIResponseBody.FromJson(result)?.Text;  | 
95 |  | - | 
96 |  | -            // Log text result.  | 
97 |  | -            Debug.Log($"[Whisper_API.Transcription.Samples] Result:\n{text}");  | 
98 |  | -        }  | 
99 |  | -    }  | 
100 |  | -}  | 
101 |  | -```  | 
 | 28 | +Please generate your API key on [OpenAI](https://platform.openai.com/account/api-keys).  | 
102 | 29 | 
 
  | 
103 |  | -See also [Sample](https://github.com/mochi-neko/Whisper-API-unity/blob/main/Assets/Mochineko/Whisper_API.Samples/Transcription/TranscriptionSample.cs).  | 
 | 30 | +See [sample codes](./Assets/Mochineko/WhisperAPI.Samples).  | 
104 | 31 | 
 
  | 
105 |  | -Translation (translate speech audio into English text) sample is [here](https://github.com/mochi-neko/Whisper-API-unity/blob/main/Assets/Mochineko/Whisper_API.Samples/Transcription/TranscriptionSample.cs).  | 
 | 32 | +You can customize handling of retries to fit needs of your project by [Relent](https://github.com/mochi-neko/Relent),  | 
 | 33 | +e.g. [PolicyFactory](./Assets/Mochineko/WhisperAPI.Samples/PolicyFactory.cs).  | 
106 | 34 | 
 
  | 
107 | 35 | ## Changelog  | 
108 | 36 | 
 
  | 
109 |  | -See [CHANGELOG](https://github.com/mochi-neko/Whisper-API-unity/blob/main/CHANGELOG.md)  | 
 | 37 | +See [CHANGELOG](./CHANGELOG.md).  | 
110 | 38 | 
 
  | 
111 |  | -## 3rd Party Notices  | 
 | 39 | +## 3rd party notices  | 
112 | 40 | 
 
  | 
113 |  | -See [NOTICE](https://github.com/mochi-neko/Whisper-API-unity/blob/main/NOTICE.md).  | 
 | 41 | +See [NOTICE](./NOTICE.md).  | 
114 | 42 | 
 
  | 
115 | 43 | ## License  | 
116 | 44 | 
 
  | 
117 |  | -[MIT License](https://github.com/mochi-neko/Whisper-API-unity/blob/main/LICENSE)  | 
 | 45 | +Licensed under the [MIT](./LICENSE) License.  | 
0 commit comments