Skip to content

Commit 3d9fe1f

Browse files
authored
new model (#25595)
* dg * fix test
1 parent fbcffd2 commit 3d9fe1f

File tree

5 files changed

+340
-0
lines changed

5 files changed

+340
-0
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
---
2+
import { z } from "astro:schema";
3+
import { Code } from "@astrojs/starlight/components";
4+
import Details from "~/components/Details.astro";
5+
6+
type Props = z.infer<typeof props>;
7+
8+
const props = z.object({
9+
name: z.string(),
10+
lora: z.boolean(),
11+
});
12+
13+
const { name } = props.parse(Astro.props);
14+
15+
const worker = `
16+
export default {
17+
async fetch(request, env, ctx): Promise<Response> {
18+
const resp = await env.AI.run("${name}", {
19+
encoding: "linear16",
20+
sample_rate: "16000"
21+
}, {
22+
websocket: true
23+
});
24+
return resp;
25+
},
26+
} satisfies ExportedHandler<Env>;
27+
`;
28+
29+
const deployWorker = `
30+
npx wrangler deploy
31+
`;
32+
33+
const clientScript = `
34+
const ws = new WebSocket('wss://<your-worker-url.com>');
35+
36+
ws.onopen = () => {
37+
console.log('Connected to WebSocket');
38+
39+
// Generate and send random audio bytes
40+
// You can replace this part with a function
41+
// that reads from your mic or other audio source
42+
const audioData = generateRandomAudio();
43+
ws.send(audioData);
44+
console.log('Audio data sent');
45+
};
46+
47+
ws.onmessage = (event) => {
48+
// Transcription will be received here
49+
// Add your custom logic to parse the data
50+
console.log('Received:', event.data);
51+
};
52+
53+
ws.onerror = (error) => {
54+
console.error('WebSocket error:', error);
55+
};
56+
57+
ws.onclose = () => {
58+
console.log('WebSocket closed');
59+
};
60+
61+
// Generate random audio data (1 second of noise at 44.1kHz, mono)
62+
function generateRandomAudio() {
63+
const sampleRate = 44100;
64+
const duration = 1;
65+
const numSamples = sampleRate * duration;
66+
const buffer = new ArrayBuffer(numSamples * 2);
67+
const view = new Int16Array(buffer);
68+
69+
for (let i = 0; i < numSamples; i++) {
70+
view[i] = Math.floor(Math.random() * 65536 - 32768);
71+
}
72+
73+
return buffer;
74+
}
75+
`;
76+
77+
---
78+
79+
<>
80+
<Details header="Step 1: Create a Worker that establishes a WebSocket connection">
81+
<Code code={worker} lang="ts" />
82+
</Details>
83+
84+
<Details header="Step 2: Deploy your Worker">
85+
<Code code={deployWorker} lang="sh" />
86+
</Details>
87+
88+
<Details header="Step 3: Write a client script to connect to your Worker and send audio">
89+
<Code code={clientScript} lang="js" />
90+
</Details>
91+
</>
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
---
2+
title: New Deepgram Flux model available on Workers AI
3+
description: Partner voice activity detection model
4+
date: 2025-10-02
5+
---
6+
7+
Deepgram's newest Flux model [`@cf/deepgram/flux`](/workers-ai/models/flux/) is now available on Workers AI, hosted directly on Cloudflare's infrastructure. We're excited to be a launch partner with Deepgram and offer their new Speech Recognition model built specifically for enabling voice agents. Check out [Deepgram's blog](https://deepgram.com/flux) for more details on the release.
8+
9+
The Flux model can be used in conjunction with Deepgram's speech-to-text model [`@cf/deepgram/nova-3](/workers-ai/models/nova-3/) and text-to-speech model [`@cf/deepgram/aura-1](/workers-ai/models/aura-1/) to build end-to-end voice agents. Having Deepgram on Workers AI takes advantage of our edge GPU infrastructure, for ultra low latency voice AI applications.
10+
11+
## Promotional Pricing
12+
For the month of October 2025, Deepgram's Flux model will be free to use on Workers AI. Official pricing will be announced soon and charged after the promotional pricing period ends on October 31, 2025. Check out the [model page](/workers-ai/models/flux/) for pricing details in the future.
13+
14+
15+
## Example Usage
16+
17+
The new Flux model is WebSocket only as it requires live bi-directional streaming in order to recognize speech activity.
18+
19+
1. Create a worker that establishes a websocket connection with `@cf/deepgram/flux`
20+
21+
```js
22+
export default {
23+
async fetch(request, env, ctx): Promise<Response> {
24+
const resp = await env.AI.run("@cf/deepgram/flux", {
25+
encoding: "linear16",
26+
sample_rate: "16000"
27+
}, {
28+
websocket: true
29+
});
30+
return resp;
31+
},
32+
} satisfies ExportedHandler<Env>;
33+
```
34+
35+
2. Deploy your worker
36+
```bash
37+
npx wrangler deploy
38+
```
39+
40+
3. Write a client script to connect to your worker and start sending random audio bytes to it
41+
```js
42+
const ws = new WebSocket('wss://<your-worker-url.com>');
43+
44+
ws.onopen = () => {
45+
console.log('Connected to WebSocket');
46+
47+
// Generate and send random audio bytes
48+
// You can replace this part with a function
49+
// that reads from your mic or other audio source
50+
const audioData = generateRandomAudio();
51+
ws.send(audioData);
52+
console.log('Audio data sent');
53+
};
54+
55+
ws.onmessage = (event) => {
56+
// Transcription will be received here
57+
// Add your custom logic to parse the data
58+
console.log('Received:', event.data);
59+
};
60+
61+
ws.onerror = (error) => {
62+
console.error('WebSocket error:', error);
63+
};
64+
65+
ws.onclose = () => {
66+
console.log('WebSocket closed');
67+
};
68+
69+
// Generate random audio data (1 second of noise at 44.1kHz, mono)
70+
function generateRandomAudio() {
71+
const sampleRate = 44100;
72+
const duration = 1;
73+
const numSamples = sampleRate * duration;
74+
const buffer = new ArrayBuffer(numSamples * 2);
75+
const view = new Int16Array(buffer);
76+
77+
for (let i = 0; i < numSamples; i++) {
78+
view[i] = Math.floor(Math.random() * 65536 - 32768);
79+
}
80+
81+
return buffer;
82+
}
83+
```

src/content/release-notes/workers-ai.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ link: "/workers-ai/changelog/"
33
productName: Workers AI
44
productLink: "/workers-ai/"
55
entries:
6+
- publish_date: "2025-10-02"
7+
title: Deepgram Flux now available on Workers AI
8+
description: |-
9+
- We're excited to be a launch partner with Deepgram and offer their new Speech Recognition model built specifically for enabling voice agents. Check out [Deepgram's blog](https://deepgram.com/flux) for more details on the release.
10+
- Access the model through [`@cf/deepgram/flux`](/workers-ai/models/flux/) and check out the [changelog](/changelog/2025-10-02-deepgram-flux/) for in-depth examples.
611
- publish_date: "2025-09-24"
712
title: New local models available on Workers AI
813
description: |-
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
{
2+
"id": "a2a2afba-b609-4325-8c41-5791ce962239",
3+
"source": 1,
4+
"name": "@cf/deepgram/flux",
5+
"description": "Flux is the first conversational speech recognition model built specifically for voice agents.",
6+
"task": {
7+
"id": "dfce1c48-2a81-462e-a7fd-de97ce985207",
8+
"name": "Automatic Speech Recognition",
9+
"description": "Automatic speech recognition (ASR) models convert a speech signal, typically an audio input, to text."
10+
},
11+
"created_at": "2025-09-29 21:07:55.114",
12+
"tags": [],
13+
"properties": [
14+
{
15+
"property_id": "partner",
16+
"value": "true"
17+
},
18+
{
19+
"property_id": "realtime",
20+
"value": "true"
21+
}
22+
],
23+
"schema": {
24+
"input": {
25+
"type": "object",
26+
"properties": {
27+
"encoding": {
28+
"type": "string",
29+
"description": "Encoding of the audio stream. Currently only supports raw signed little-endian 16-bit PCM.",
30+
"enum": [
31+
"linear16"
32+
]
33+
},
34+
"sample_rate": {
35+
"type": "string",
36+
"description": "Sample rate of the audio stream in Hz.",
37+
"pattern": "^[0-9]+$"
38+
},
39+
"eager_eot_threshold": {
40+
"type": "string",
41+
"description": "End-of-turn confidence required to fire an eager end-of-turn event. When set, enables EagerEndOfTurn and TurnResumed events. Valid Values 0.3 - 0.9."
42+
},
43+
"eot_threshold": {
44+
"type": "string",
45+
"description": "End-of-turn confidence required to finish a turn. Valid Values 0.5 - 0.9.",
46+
"default": "0.7"
47+
},
48+
"eot_timeout_ms": {
49+
"type": "string",
50+
"description": "A turn will be finished when this much time has passed after speech, regardless of EOT confidence.",
51+
"default": "5000",
52+
"pattern": "^[0-9]+$"
53+
},
54+
"keyterm": {
55+
"type": "string",
56+
"description": "Keyterm prompting can improve recognition of specialized terminology. Pass multiple keyterm query parameters to boost multiple keyterms."
57+
},
58+
"mip_opt_out": {
59+
"type": "string",
60+
"description": "Opts out requests from the Deepgram Model Improvement Program. Refer to Deepgram Docs for pricing impacts before setting this to true. https://dpgr.am/deepgram-mip",
61+
"enum": [
62+
"true",
63+
"false"
64+
],
65+
"default": "false"
66+
},
67+
"tag": {
68+
"type": "string",
69+
"description": "Label your requests for the purpose of identification during usage reporting"
70+
},
71+
"required": [
72+
"sample_rate",
73+
"encoding"
74+
]
75+
}
76+
},
77+
"output": {
78+
"type": "object",
79+
"description": "Output will be returned as websocket messages.",
80+
"properties": {
81+
"request_id": {
82+
"type": "string",
83+
"description": "The unique identifier of the request (uuid)"
84+
},
85+
"sequence_id": {
86+
"type": "integer",
87+
"description": "Starts at 0 and increments for each message the server sends to the client.",
88+
"minimum": 0
89+
},
90+
"event": {
91+
"type": "string",
92+
"description": "The type of event being reported.",
93+
"enum": [
94+
"Update",
95+
"StartOfTurn",
96+
"EagerEndOfTurn",
97+
"TurnResumed",
98+
"EndOfTurn"
99+
]
100+
},
101+
"turn_index": {
102+
"type": "integer",
103+
"description": "The index of the current turn",
104+
"minimum": 0
105+
},
106+
"audio_window_start": {
107+
"type": "number",
108+
"description": "Start time in seconds of the audio range that was transcribed"
109+
},
110+
"audio_window_end": {
111+
"type": "number",
112+
"description": "End time in seconds of the audio range that was transcribed"
113+
},
114+
"transcript": {
115+
"type": "string",
116+
"description": "Text that was said over the course of the current turn"
117+
},
118+
"words": {
119+
"type": "array",
120+
"description": "The words in the transcript",
121+
"items": {
122+
"type": "object",
123+
"required": [
124+
"word",
125+
"confidence"
126+
],
127+
"properties": {
128+
"word": {
129+
"type": "string",
130+
"description": "The individual punctuated, properly-cased word from the transcript"
131+
},
132+
"confidence": {
133+
"type": "number",
134+
"description": "Confidence that this word was transcribed correctly"
135+
}
136+
}
137+
}
138+
},
139+
"end_of_turn_confidence": {
140+
"type": "number",
141+
"description": "Confidence that no more speech is coming in this turn"
142+
}
143+
}
144+
}
145+
}
146+
}

src/pages/workers-ai/models/[name].astro

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import { authorData } from "~/components/models/data";
3131
import OpenAIResponsesTextGenerationCode from "~/components/models/code/OpenAIResponsesTextGenerationCode.astro";
3232
import DeepgramAura from "~/components/models/code/DeepgramAura.astro";
3333
import DeepgramNova from "~/components/models/code/DeepgramNova.astro";
34+
import DeepgramFlux from "~/components/models/code/DeepgramFlux.astro";
3435
3536
export const getStaticPaths = (async () => {
3637
const models = await getCollection("workers-ai-models");
@@ -126,6 +127,10 @@ if (model.name === "@cf/deepgram/nova-3") {
126127
CodeExamples = DeepgramNova;
127128
}
128129
130+
if (model.name === "@cf/deepgram/flux") {
131+
CodeExamples = DeepgramFlux;
132+
}
133+
129134
const description = model.description;
130135
131136
const isBeta = model.properties.find(
@@ -221,6 +226,16 @@ const starlightPageProps = {
221226
)
222227
}
223228

229+
{
230+
model.name === "@cf/deepgram/flux" && (
231+
<Aside>
232+
<p>
233+
For the month of October 2025, Deepgram's Flux model will be free to use on Workers AI. Official pricing will be announced soon and charged after the promotional pricing period ends on October 31, 2025.
234+
</p>
235+
</Aside>
236+
)
237+
}
238+
224239
<ModelFeatures model={model} />
225240
{
226241
model.name === "@cf/deepgram/nova-3" && (

0 commit comments

Comments
 (0)