|
| 1 | +--- |
| 2 | +updated: 2025-04-03 |
| 3 | +difficulty: Beginner |
| 4 | +pcx_content_type: tutorial |
| 5 | +title: Whisper-large-v3-turbo with Cloudflare Workers AI |
| 6 | +tags: |
| 7 | + - AI |
| 8 | +--- |
| 9 | + |
| 10 | +In this tutorial you will learn how to: |
| 11 | + |
| 12 | +- **Transcribe large audio files:** Use the [Whisper-large-v3-turbo](/workers-ai/models/whisper-large-v3-turbo/) model from Cloudflare Workers AI to perform automatic speech recognition (ASR) or translation. |
| 13 | +- **Handle large files:** Split large audio files into smaller chunks for processing, which helps overcome memory and execution time limitations. |
| 14 | +- **Deploy using Cloudflare Workers:** Create a scalable, low‑latency transcription pipeline in a serverless environment. |
| 15 | + |
| 16 | +## 1: Create a new Cloudflare Worker project |
| 17 | + |
| 18 | +import { Render, PackageManagers, WranglerConfig } from "~/components"; |
| 19 | + |
| 20 | +<Render file="prereqs" product="workers" /> |
| 21 | + |
| 22 | +You will create a new Worker project using the `create-cloudflare` CLI (C3). [C3](https://github.com/cloudflare/workers-sdk/tree/main/packages/create-cloudflare) is a command-line tool designed to help you set up and deploy new applications to Cloudflare. |
| 23 | + |
| 24 | +Create a new project named `whisper-tutorial` by running: |
| 25 | + |
| 26 | +<PackageManagers |
| 27 | + type="create" |
| 28 | + pkg="cloudflare@latest" |
| 29 | + args={"whisper-tutorial"} |
| 30 | +/> |
| 31 | + |
| 32 | +Running `npm create cloudflare@latest` will prompt you to install the [`create-cloudflare` package](https://www.npmjs.com/package/create-cloudflare), and lead you through setup. C3 will also install [Wrangler](/workers/wrangler/), the Cloudflare Developer Platform CLI. |
| 33 | + |
| 34 | +<Render |
| 35 | + file="c3-post-run-steps" |
| 36 | + product="workers" |
| 37 | + params={{ |
| 38 | + category: "hello-world", |
| 39 | + type: "Worker only", |
| 40 | + lang: "TypeScript", |
| 41 | + }} |
| 42 | +/> |
| 43 | + |
| 44 | +This will create a new `whisper-tutorial` directory. Your new `whisper-tutorial` directory will include: |
| 45 | + |
| 46 | +- A `"Hello World"` [Worker](/workers/get-started/guide/#3-write-code) at `src/index.ts`. |
| 47 | +- A [`wrangler.jsonc`](/workers/wrangler/configuration/) configuration file. |
| 48 | + |
| 49 | +Go to your application directory: |
| 50 | + |
| 51 | +```sh |
| 52 | +cd whisper-tutorial |
| 53 | +``` |
| 54 | + |
| 55 | +## 2. Connect your Worker to Workers AI |
| 56 | + |
| 57 | +You must create an AI binding for your Worker to connect to Workers AI. [Bindings](/workers/runtime-apis/bindings/) allow your Workers to interact with resources, like Workers AI, on the Cloudflare Developer Platform. |
| 58 | + |
| 59 | +To bind Workers AI to your Worker, add the following to the end of your `wrangler.toml` file: |
| 60 | + |
| 61 | +<WranglerConfig> |
| 62 | + |
| 63 | +```toml |
| 64 | +[ai] |
| 65 | +binding = "AI" |
| 66 | +``` |
| 67 | + |
| 68 | +</WranglerConfig> |
| 69 | + |
| 70 | +Your binding is [available in your Worker code](/workers/reference/migrate-to-module-workers/#bindings-in-es-modules-format) on [`env.AI`](/workers/runtime-apis/handlers/fetch/). |
| 71 | + |
| 72 | +## 3. Configure Wrangler |
| 73 | + |
| 74 | +In your wrangler file, add or update the following settings to enable Node.js APIs and polyfills (with a compatibility date of 2024‑09‑23 or later): |
| 75 | + |
| 76 | +<WranglerConfig> |
| 77 | + |
| 78 | +```toml title="wrangler.toml" |
| 79 | +compatibility_flags = [ "nodejs_compat" ] |
| 80 | +compatibility_date = "2024-09-23" |
| 81 | +``` |
| 82 | + |
| 83 | +</WranglerConfig> |
| 84 | + |
| 85 | +## 4. Handle large audio files with chunking |
| 86 | + |
| 87 | +Replace the contents of your `src/index.ts` file with the following integrated code. This sample demonstrates how to: |
| 88 | + |
| 89 | +(1) Extract an audio file URL from the query parameters. |
| 90 | + |
| 91 | +(2) Fetch the audio file while explicitly following redirects. |
| 92 | + |
| 93 | +(3) Split the audio file into smaller chunks (such as, 1 MB chunks). |
| 94 | + |
| 95 | +(4) Transcribe each chunk using the Whisper-large-v3-turbo model via the Cloudflare AI binding. |
| 96 | + |
| 97 | +(5) Return the aggregated transcription as plain text. |
| 98 | + |
| 99 | +```ts |
| 100 | +import { Buffer } from "node:buffer"; |
| 101 | +import type { Ai } from "workers-ai"; |
| 102 | + |
| 103 | +export interface Env { |
| 104 | + AI: Ai; |
| 105 | + // If needed, add your KV namespace for storing transcripts. |
| 106 | + // MY_KV_NAMESPACE: KVNamespace; |
| 107 | +} |
| 108 | + |
| 109 | +/** |
| 110 | + * Fetches the audio file from the provided URL and splits it into chunks. |
| 111 | + * This function explicitly follows redirects. |
| 112 | + * |
| 113 | + * @param audioUrl - The URL of the audio file. |
| 114 | + * @returns An array of ArrayBuffers, each representing a chunk of the audio. |
| 115 | + */ |
| 116 | +async function getAudioChunks(audioUrl: string): Promise<ArrayBuffer[]> { |
| 117 | + const response = await fetch(audioUrl, { redirect: "follow" }); |
| 118 | + if (!response.ok) { |
| 119 | + throw new Error(`Failed to fetch audio: ${response.status}`); |
| 120 | + } |
| 121 | + const arrayBuffer = await response.arrayBuffer(); |
| 122 | + |
| 123 | + // Example: Split the audio into 1MB chunks. |
| 124 | + const chunkSize = 1024 * 1024; // 1MB |
| 125 | + const chunks: ArrayBuffer[] = []; |
| 126 | + for (let i = 0; i < arrayBuffer.byteLength; i += chunkSize) { |
| 127 | + const chunk = arrayBuffer.slice(i, i + chunkSize); |
| 128 | + chunks.push(chunk); |
| 129 | + } |
| 130 | + return chunks; |
| 131 | +} |
| 132 | + |
| 133 | +/** |
| 134 | + * Transcribes a single audio chunk using the Whisper‑large‑v3‑turbo model. |
| 135 | + * The function converts the audio chunk to a Base64-encoded string and |
| 136 | + * sends it to the model via the AI binding. |
| 137 | + * |
| 138 | + * @param chunkBuffer - The audio chunk as an ArrayBuffer. |
| 139 | + * @param env - The Cloudflare Worker environment, including the AI binding. |
| 140 | + * @returns The transcription text from the model. |
| 141 | + */ |
| 142 | +async function transcribeChunk( |
| 143 | + chunkBuffer: ArrayBuffer, |
| 144 | + env: Env, |
| 145 | +): Promise<string> { |
| 146 | + const base64 = Buffer.from(chunkBuffer, "binary").toString("base64"); |
| 147 | + const res = await env.AI.run("@cf/openai/whisper-large-v3-turbo", { |
| 148 | + audio: base64, |
| 149 | + // Optional parameters (uncomment and set if needed): |
| 150 | + // task: "transcribe", // or "translate" |
| 151 | + // language: "en", |
| 152 | + // vad_filter: "false", |
| 153 | + // initial_prompt: "Provide context if needed.", |
| 154 | + // prefix: "Transcription:", |
| 155 | + }); |
| 156 | + return res.text; // Assumes the transcription result includes a "text" property. |
| 157 | +} |
| 158 | + |
| 159 | +/** |
| 160 | + * The main fetch handler. It extracts the 'url' query parameter, fetches the audio, |
| 161 | + * processes it in chunks, and returns the full transcription. |
| 162 | + */ |
| 163 | +export default { |
| 164 | + async fetch( |
| 165 | + request: Request, |
| 166 | + env: Env, |
| 167 | + ctx: ExecutionContext, |
| 168 | + ): Promise<Response> { |
| 169 | + // Extract the audio URL from the query parameters. |
| 170 | + const { searchParams } = new URL(request.url); |
| 171 | + const audioUrl = searchParams.get("url"); |
| 172 | + |
| 173 | + if (!audioUrl) { |
| 174 | + return new Response("Missing 'url' query parameter", { status: 400 }); |
| 175 | + } |
| 176 | + |
| 177 | + // Get the audio chunks. |
| 178 | + const audioChunks: ArrayBuffer[] = await getAudioChunks(audioUrl); |
| 179 | + let fullTranscript = ""; |
| 180 | + |
| 181 | + // Process each chunk and build the full transcript. |
| 182 | + for (const chunk of audioChunks) { |
| 183 | + try { |
| 184 | + const transcript = await transcribeChunk(chunk, env); |
| 185 | + fullTranscript += transcript + "\n"; |
| 186 | + } catch (error) { |
| 187 | + fullTranscript += "[Error transcribing chunk]\n"; |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + return new Response(fullTranscript, { |
| 192 | + headers: { "Content-Type": "text/plain" }, |
| 193 | + }); |
| 194 | + }, |
| 195 | +} satisfies ExportedHandler<Env>; |
| 196 | +``` |
| 197 | + |
| 198 | +## 5. Deploy your Worker |
| 199 | + |
| 200 | +1. **Run the Worker locally:** |
| 201 | + |
| 202 | + Use wrangler's development mode to test your Worker locally: |
| 203 | + |
| 204 | +```sh |
| 205 | +npx wrangler dev |
| 206 | +``` |
| 207 | + |
| 208 | +Open your browser and go to [http://localhost:8787](http://localhost:8787), or use curl: |
| 209 | + |
| 210 | +```sh |
| 211 | +curl "http://localhost:8787?url=https://raw.githubusercontent.com/your-username/your-repo/main/your-audio-file.mp3" |
| 212 | +``` |
| 213 | + |
| 214 | +Replace the URL query parameter with the direct link to your audio file. (For GitHub-hosted files, ensure you use the raw file URL.) |
| 215 | + |
| 216 | +2. **Deploy the Worker:** |
| 217 | + |
| 218 | + Once testing is complete, deploy your Worker with: |
| 219 | + |
| 220 | +```sh |
| 221 | +npx wrangler deploy |
| 222 | +``` |
| 223 | + |
| 224 | +3. **Test the deployed Worker:** |
| 225 | + |
| 226 | + After deployment, test your Worker by passing the audio URL as a query parameter: |
| 227 | + |
| 228 | +```sh |
| 229 | +curl "https://<your-worker-subdomain>.workers.dev?url=https://raw.githubusercontent.com/your-username/your-repo/main/your-audio-file.mp3" |
| 230 | +``` |
| 231 | + |
| 232 | +Make sure to replace `<your-worker-subdomain>`, `your-username`, `your-repo`, and `your-audio-file.mp3` with your actual details. |
| 233 | + |
| 234 | +If successful, the Worker will return a transcript of the audio file: |
| 235 | + |
| 236 | +```sh |
| 237 | +This is the transcript of the audio... |
| 238 | +``` |
0 commit comments