Skip to content

Commit 54c299b

Browse files
fixes
1 parent addff6a commit 54c299b

File tree

1 file changed

+81
-69
lines changed

1 file changed

+81
-69
lines changed

src/content/docs/workers-ai/tutorials/build-a-workers-ai-whisper-with-chunking.mdx

Lines changed: 81 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ You will create a new Worker project using the `create-cloudflare` CLI (C3). [C3
2323

2424
Create a new project named `whisper-tutorial` by running:
2525

26-
<PackageManagers type="create" pkg="cloudflare@latest" args={"whisper-tutorial"} />
26+
<PackageManagers
27+
type="create"
28+
pkg="cloudflare@latest"
29+
args={"whisper-tutorial"}
30+
/>
2731

2832
Running `npm create cloudflare@latest` will prompt you to install the [`create-cloudflare` package](https://www.npmjs.com/package/create-cloudflare), and lead you through setup. C3 will also install [Wrangler](/workers/wrangler/), the Cloudflare Developer Platform CLI.
2933

@@ -82,21 +86,24 @@ compatibility_date = "2024-09-23"
8286

8387
Replace the contents of your `src/index.ts` file with the following integrated code. This sample demonstrates how to:
8488

85-
- Extract an audio file URL from the query parameters.
86-
- Fetch the audio file while explicitly following redirects.
87-
- Split the audio file into smaller chunks (such as, 1MB chunks).
88-
- Transcribe each chunk using the Whisper-large-v3-turbo model via the Cloudflare AI binding.
89-
- Return the aggregated transcription as plain text.
89+
(1) Extract an audio file URL from the query parameters.
9090

91-
```ts
91+
(2) Fetch the audio file while explicitly following redirects.
92+
93+
(3) Split the audio file into smaller chunks (such as, 1MB chunks).
94+
95+
(4) Transcribe each chunk using the Whisper-large-v3-turbo model via the Cloudflare AI binding.
9296

97+
(5) Return the aggregated transcription as plain text.
98+
99+
```ts
93100
import { Buffer } from "node:buffer";
94101
import type { Ai } from "workers-ai";
95102

96103
export interface Env {
97-
AI: Ai;
98-
// If needed, add your KV namespace for storing transcripts.
99-
// MY_KV_NAMESPACE: KVNamespace;
104+
AI: Ai;
105+
// If needed, add your KV namespace for storing transcripts.
106+
// MY_KV_NAMESPACE: KVNamespace;
100107
}
101108

102109
/**
@@ -107,20 +114,20 @@ export interface Env {
107114
* @returns An array of ArrayBuffers, each representing a chunk of the audio.
108115
*/
109116
async function getAudioChunks(audioUrl: string): Promise<ArrayBuffer[]> {
110-
const response = await fetch(audioUrl, { redirect: "follow" });
111-
if (!response.ok) {
112-
throw new Error(`Failed to fetch audio: ${response.status}`);
113-
}
114-
const arrayBuffer = await response.arrayBuffer();
115-
116-
// Example: Split the audio into 1MB chunks.
117-
const chunkSize = 1024 * 1024; // 1MB
118-
const chunks: ArrayBuffer[] = [];
119-
for (let i = 0; i < arrayBuffer.byteLength; i += chunkSize) {
120-
const chunk = arrayBuffer.slice(i, i + chunkSize);
121-
chunks.push(chunk);
122-
}
123-
return chunks;
117+
const response = await fetch(audioUrl, { redirect: "follow" });
118+
if (!response.ok) {
119+
throw new Error(`Failed to fetch audio: ${response.status}`);
120+
}
121+
const arrayBuffer = await response.arrayBuffer();
122+
123+
// Example: Split the audio into 1MB chunks.
124+
const chunkSize = 1024 * 1024; // 1MB
125+
const chunks: ArrayBuffer[] = [];
126+
for (let i = 0; i < arrayBuffer.byteLength; i += chunkSize) {
127+
const chunk = arrayBuffer.slice(i, i + chunkSize);
128+
chunks.push(chunk);
129+
}
130+
return chunks;
124131
}
125132

126133
/**
@@ -132,56 +139,63 @@ async function getAudioChunks(audioUrl: string): Promise<ArrayBuffer[]> {
132139
* @param env - The Cloudflare Worker environment, including the AI binding.
133140
* @returns The transcription text from the model.
134141
*/
135-
async function transcribeChunk(chunkBuffer: ArrayBuffer, env: Env): Promise<string> {
136-
const base64 = Buffer.from(chunkBuffer, "binary").toString("base64");
137-
const res = await env.AI.run("@cf/openai/whisper-large-v3-turbo", {
138-
audio: base64,
139-
// Optional parameters (uncomment and set if needed):
140-
// task: "transcribe", // or "translate"
141-
// language: "en",
142-
// vad_filter: "false",
143-
// initial_prompt: "Provide context if needed.",
144-
// prefix: "Transcription:",
145-
});
146-
return res.text; // Assumes the transcription result includes a "text" property.
142+
async function transcribeChunk(
143+
chunkBuffer: ArrayBuffer,
144+
env: Env,
145+
): Promise<string> {
146+
const base64 = Buffer.from(chunkBuffer, "binary").toString("base64");
147+
const res = await env.AI.run("@cf/openai/whisper-large-v3-turbo", {
148+
audio: base64,
149+
// Optional parameters (uncomment and set if needed):
150+
// task: "transcribe", // or "translate"
151+
// language: "en",
152+
// vad_filter: "false",
153+
// initial_prompt: "Provide context if needed.",
154+
// prefix: "Transcription:",
155+
});
156+
return res.text; // Assumes the transcription result includes a "text" property.
147157
}
148158

149159
/**
150160
* The main fetch handler. It extracts the 'url' query parameter, fetches the audio,
151161
* processes it in chunks, and returns the full transcription.
152162
*/
153163
export default {
154-
async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise<Response> {
155-
// Extract the audio URL from the query parameters.
156-
const { searchParams } = new URL(request.url);
157-
const audioUrl = searchParams.get("url");
158-
159-
if (!audioUrl) {
160-
return new Response("Missing 'url' query parameter", { status: 400 });
161-
}
162-
163-
// Get the audio chunks.
164-
const audioChunks: ArrayBuffer[] = await getAudioChunks(audioUrl);
165-
let fullTranscript = "";
166-
167-
// Process each chunk and build the full transcript.
168-
for (const chunk of audioChunks) {
169-
try {
170-
const transcript = await transcribeChunk(chunk, env);
171-
fullTranscript += transcript + "\n";
172-
} catch (error) {
173-
fullTranscript += "[Error transcribing chunk]\n";
174-
}
175-
}
176-
177-
return new Response(fullTranscript, {
178-
headers: { "Content-Type": "text/plain" },
179-
});
180-
},
164+
async fetch(
165+
request: Request,
166+
env: Env,
167+
ctx: ExecutionContext,
168+
): Promise<Response> {
169+
// Extract the audio URL from the query parameters.
170+
const { searchParams } = new URL(request.url);
171+
const audioUrl = searchParams.get("url");
172+
173+
if (!audioUrl) {
174+
return new Response("Missing 'url' query parameter", { status: 400 });
175+
}
176+
177+
// Get the audio chunks.
178+
const audioChunks: ArrayBuffer[] = await getAudioChunks(audioUrl);
179+
let fullTranscript = "";
180+
181+
// Process each chunk and build the full transcript.
182+
for (const chunk of audioChunks) {
183+
try {
184+
const transcript = await transcribeChunk(chunk, env);
185+
fullTranscript += transcript + "\n";
186+
} catch (error) {
187+
fullTranscript += "[Error transcribing chunk]\n";
188+
}
189+
}
190+
191+
return new Response(fullTranscript, {
192+
headers: { "Content-Type": "text/plain" },
193+
});
194+
},
181195
} satisfies ExportedHandler<Env>;
182196
```
183197

184-
## 5. Develop, test, and deploy
198+
## 5. Deploy your Worker
185199

186200
1. **Run the Worker locally:**
187201

@@ -191,15 +205,13 @@ export default {
191205
npx wrangler dev --remote
192206
```
193207

194-
Open your browser and go to [http://localhost:8787](http://localhost:8787), or use curl:
208+
Open your browser and go to [http://localhost:8787](http://localhost:8787), or use curl:
195209

196210
```sh
197211
curl "http://localhost:8787?url=https://raw.githubusercontent.com/your-username/your-repo/main/your-audio-file.mp3"
198212
```
199213

200-
Replace the URL query parameter with the direct link to your audio file. (For GitHub-hosted files, ensure you use the raw file URL.)
201-
202-
214+
Replace the URL query parameter with the direct link to your audio file. (For GitHub-hosted files, ensure you use the raw file URL.)
203215

204216
2. **Deploy the Worker:**
205217

@@ -217,7 +229,7 @@ npx wrangler deploy
217229
curl "https://<your-worker-subdomain>.workers.dev?url=https://raw.githubusercontent.com/your-username/your-repo/main/your-audio-file.mp3"
218230
```
219231

220-
Make sure to replace `<your-worker-subdomain>`, `your-username`, `your-repo`, and `your-audio-file.mp3` with your actual details.
232+
Make sure to replace `<your-worker-subdomain>`, `your-username`, `your-repo`, and `your-audio-file.mp3` with your actual details.
221233

222234
If successful, the Worker will return a transcript of the audio file:
223235

0 commit comments

Comments
 (0)