@@ -23,7 +23,11 @@ You will create a new Worker project using the `create-cloudflare` CLI (C3). [C3
2323
2424Create a new project named ` whisper-tutorial ` by running:
2525
26- <PackageManagers type = " create" pkg = " cloudflare@latest" args = { " whisper-tutorial" } />
26+ <PackageManagers
27+ type = " create"
28+ pkg = " cloudflare@latest"
29+ args = { " whisper-tutorial" }
30+ />
2731
2832Running ` npm create cloudflare@latest ` will prompt you to install the [ ` create-cloudflare ` package] ( https://www.npmjs.com/package/create-cloudflare ) , and lead you through setup. C3 will also install [ Wrangler] ( /workers/wrangler/ ) , the Cloudflare Developer Platform CLI.
2933
@@ -82,21 +86,24 @@ compatibility_date = "2024-09-23"
8286
8387Replace the contents of your ` src/index.ts ` file with the following integrated code. This sample demonstrates how to:
8488
85- - Extract an audio file URL from the query parameters.
86- - Fetch the audio file while explicitly following redirects.
87- - Split the audio file into smaller chunks (such as, 1MB chunks).
88- - Transcribe each chunk using the Whisper-large-v3-turbo model via the Cloudflare AI binding.
89- - Return the aggregated transcription as plain text.
89+ (1) Extract an audio file URL from the query parameters.
9090
91- ``` ts
91+ (2) Fetch the audio file while explicitly following redirects.
92+
93+ (3) Split the audio file into smaller chunks (such as, 1MB chunks).
94+
95+ (4) Transcribe each chunk using the Whisper-large-v3-turbo model via the Cloudflare AI binding.
9296
97+ (5) Return the aggregated transcription as plain text.
98+
99+ ``` ts
93100import { Buffer } from " node:buffer" ;
94101import type { Ai } from " workers-ai" ;
95102
96103export interface Env {
97- AI: Ai ;
98- // If needed, add your KV namespace for storing transcripts.
99- // MY_KV_NAMESPACE: KVNamespace;
104+ AI: Ai ;
105+ // If needed, add your KV namespace for storing transcripts.
106+ // MY_KV_NAMESPACE: KVNamespace;
100107}
101108
102109/**
@@ -107,20 +114,20 @@ export interface Env {
107114 * @returns An array of ArrayBuffers, each representing a chunk of the audio.
108115 */
109116async function getAudioChunks(audioUrl : string ): Promise <ArrayBuffer []> {
110- const response = await fetch (audioUrl , { redirect: " follow" });
111- if (! response .ok ) {
112- throw new Error (` Failed to fetch audio: ${response .status } ` );
113- }
114- const arrayBuffer = await response .arrayBuffer ();
115-
116- // Example: Split the audio into 1MB chunks.
117- const chunkSize = 1024 * 1024 ; // 1MB
118- const chunks: ArrayBuffer [] = [];
119- for (let i = 0 ; i < arrayBuffer .byteLength ; i += chunkSize ) {
120- const chunk = arrayBuffer .slice (i , i + chunkSize );
121- chunks .push (chunk );
122- }
123- return chunks ;
117+ const response = await fetch (audioUrl , { redirect: " follow" });
118+ if (! response .ok ) {
119+ throw new Error (` Failed to fetch audio: ${response .status } ` );
120+ }
121+ const arrayBuffer = await response .arrayBuffer ();
122+
123+ // Example: Split the audio into 1MB chunks.
124+ const chunkSize = 1024 * 1024 ; // 1MB
125+ const chunks: ArrayBuffer [] = [];
126+ for (let i = 0 ; i < arrayBuffer .byteLength ; i += chunkSize ) {
127+ const chunk = arrayBuffer .slice (i , i + chunkSize );
128+ chunks .push (chunk );
129+ }
130+ return chunks ;
124131}
125132
126133/**
@@ -132,56 +139,63 @@ async function getAudioChunks(audioUrl: string): Promise<ArrayBuffer[]> {
132139 * @param env - The Cloudflare Worker environment, including the AI binding.
133140 * @returns The transcription text from the model.
134141 */
135- async function transcribeChunk(chunkBuffer : ArrayBuffer , env : Env ): Promise <string > {
136- const base64 = Buffer .from (chunkBuffer , " binary" ).toString (" base64" );
137- const res = await env .AI .run (" @cf/openai/whisper-large-v3-turbo" , {
138- audio: base64 ,
139- // Optional parameters (uncomment and set if needed):
140- // task: "transcribe", // or "translate"
141- // language: "en",
142- // vad_filter: "false",
143- // initial_prompt: "Provide context if needed.",
144- // prefix: "Transcription:",
145- });
146- return res .text ; // Assumes the transcription result includes a "text" property.
142+ async function transcribeChunk(
143+ chunkBuffer : ArrayBuffer ,
144+ env : Env ,
145+ ): Promise <string > {
146+ const base64 = Buffer .from (chunkBuffer , " binary" ).toString (" base64" );
147+ const res = await env .AI .run (" @cf/openai/whisper-large-v3-turbo" , {
148+ audio: base64 ,
149+ // Optional parameters (uncomment and set if needed):
150+ // task: "transcribe", // or "translate"
151+ // language: "en",
152+ // vad_filter: "false",
153+ // initial_prompt: "Provide context if needed.",
154+ // prefix: "Transcription:",
155+ });
156+ return res .text ; // Assumes the transcription result includes a "text" property.
147157}
148158
149159/**
150160 * The main fetch handler. It extracts the 'url' query parameter, fetches the audio,
151161 * processes it in chunks, and returns the full transcription.
152162 */
153163export default {
154- async fetch(request : Request , env : Env , ctx : ExecutionContext ): Promise <Response > {
155- // Extract the audio URL from the query parameters.
156- const { searchParams } = new URL (request .url );
157- const audioUrl = searchParams .get (" url" );
158-
159- if (! audioUrl ) {
160- return new Response (" Missing 'url' query parameter" , { status: 400 });
161- }
162-
163- // Get the audio chunks.
164- const audioChunks: ArrayBuffer [] = await getAudioChunks (audioUrl );
165- let fullTranscript = " " ;
166-
167- // Process each chunk and build the full transcript.
168- for (const chunk of audioChunks ) {
169- try {
170- const transcript = await transcribeChunk (chunk , env );
171- fullTranscript += transcript + " \n " ;
172- } catch (error ) {
173- fullTranscript += " [Error transcribing chunk]\n " ;
174- }
175- }
176-
177- return new Response (fullTranscript , {
178- headers: { " Content-Type" : " text/plain" },
179- });
180- },
164+ async fetch(
165+ request : Request ,
166+ env : Env ,
167+ ctx : ExecutionContext ,
168+ ): Promise <Response > {
169+ // Extract the audio URL from the query parameters.
170+ const { searchParams } = new URL (request .url );
171+ const audioUrl = searchParams .get (" url" );
172+
173+ if (! audioUrl ) {
174+ return new Response (" Missing 'url' query parameter" , { status: 400 });
175+ }
176+
177+ // Get the audio chunks.
178+ const audioChunks: ArrayBuffer [] = await getAudioChunks (audioUrl );
179+ let fullTranscript = " " ;
180+
181+ // Process each chunk and build the full transcript.
182+ for (const chunk of audioChunks ) {
183+ try {
184+ const transcript = await transcribeChunk (chunk , env );
185+ fullTranscript += transcript + " \n " ;
186+ } catch (error ) {
187+ fullTranscript += " [Error transcribing chunk]\n " ;
188+ }
189+ }
190+
191+ return new Response (fullTranscript , {
192+ headers: { " Content-Type" : " text/plain" },
193+ });
194+ },
181195} satisfies ExportedHandler <Env >;
182196```
183197
184- ## 5. Develop, test, and deploy
198+ ## 5. Deploy your Worker
185199
1862001 . ** Run the Worker locally:**
187201
@@ -191,15 +205,13 @@ export default {
191205npx wrangler dev --remote
192206```
193207
194- Open your browser and go to [ http://localhost:8787 ] ( http://localhost:8787 ) , or use curl:
208+ Open your browser and go to [ http://localhost:8787 ] ( http://localhost:8787 ) , or use curl:
195209
196210``` sh
197211curl " http://localhost:8787?url=https://raw.githubusercontent.com/your-username/your-repo/main/your-audio-file.mp3"
198212```
199213
200- Replace the URL query parameter with the direct link to your audio file. (For GitHub-hosted files, ensure you use the raw file URL.)
201-
202-
214+ Replace the URL query parameter with the direct link to your audio file. (For GitHub-hosted files, ensure you use the raw file URL.)
203215
2042162 . ** Deploy the Worker:**
205217
@@ -217,7 +229,7 @@ npx wrangler deploy
217229curl " https://<your-worker-subdomain>.workers.dev?url=https://raw.githubusercontent.com/your-username/your-repo/main/your-audio-file.mp3"
218230```
219231
220- Make sure to replace ` <your-worker-subdomain> ` , ` your-username ` , ` your-repo ` , and ` your-audio-file.mp3 ` with your actual details.
232+ Make sure to replace ` <your-worker-subdomain> ` , ` your-username ` , ` your-repo ` , and ` your-audio-file.mp3 ` with your actual details.
221233
222234If successful, the Worker will return a transcript of the audio file:
223235
0 commit comments