Skip to content

Commit 8653713

Browse files
mchencokodster28
andauthored
new models (#25963)
Co-authored-by: kodster28 <[email protected]>
1 parent 54c51da commit 8653713

File tree

6 files changed

+252
-0
lines changed

6 files changed

+252
-0
lines changed
Lines changed: 2 additions & 0 deletions
Loading

src/components/models/data.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import qwen from "../../assets/images/workers-ai/qwen.svg";
1010
import blackforestlabs from "../../assets/images/workers-ai/blackforestlabs.svg";
1111
import deepgram from "../../assets/images/workers-ai/deepgram.svg";
1212
import leonardo from "../../assets/images/workers-ai/leonardo.svg";
13+
import ibm from "../../assets/images/workers-ai/ibm.svg";
1314

1415
export const authorData: Record<string, { name: string; logo: string }> = {
1516
openai: {
@@ -64,4 +65,8 @@ export const authorData: Record<string, { name: string; logo: string }> = {
6465
name: "Leonardo",
6566
logo: leonardo.src,
6667
},
68+
"ibm-granite": {
69+
name: "IBM",
70+
logo: ibm.src,
71+
},
6772
};

src/content/docs/workers-ai/platform/pricing.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di
8888
| @cf/deepgram/nova-3 | $0.0052 per audio minute input <br/> | 472.73 neurons per audio minute input <br/> |
8989
| @cf/deepgram/nova-3 (WebSocket)| $0.0092 per audio minute input <br/> | 836.36 neurons per audio minute input <br/> |
9090
| @cf/pipecat-ai/smart-turn-v2 | $0.00033795 per audio minute input <br/> | 0.51 neurons per audio minute input <br/> |
91+
| @cf/deepgram/aura-2-en | $0.030 per 1k characters input <br/> | 2.73 neurons per 1k characters input <br/> |
92+
| @cf/deepgram/aura-2-es | $0.030 per 1k characters input <br/> | 2.73 neurons per 1k characters input <br/> |
9193

9294
## Other model pricing
9395

src/content/release-notes/workers-ai.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ link: "/workers-ai/changelog/"
33
productName: Workers AI
44
productLink: "/workers-ai/"
55
entries:
6+
- publish_date: "2025-10-21"
7+
title: New voice and LLM models on Workers AI
8+
description: |-
9+
- Deepgram Aura 2 brings new text-to-speech capabilities to Workers AI. Check out [`@cf/deepgram/aura-2-en`](/workers-ai/models/aura-2-en/) and [`@cf/deepgram/aura-2-es`](/workers-ai/models/aura-2-es/) on how to use the new models.
10+
- IBM Granite model is also up! This new LLM model is small but mighty, take a look at the docs for more [`@cf/ibm-granite/granite-4.0-h-micro`](/workers-ai/models/granite-4.0-h-micro/)
611
- publish_date: "2025-10-02"
712
title: Deepgram Flux now available on Workers AI
813
description: |-
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
{
2+
"id": "01564c52-8717-47dc-8efd-907a2ca18301",
3+
"source": 1,
4+
"name": "@cf/deepgram/aura-2-en",
5+
"description": "Aura-2 is a context-aware text-to-speech (TTS) model that applies natural pacing, expressiveness, and fillers based on the context of the provided text. The quality of your text input directly impacts the naturalness of the audio output.",
6+
"task": {
7+
"id": "b52660a1-9a95-4ab2-8b1d-f232be34604a",
8+
"name": "Text-to-Speech",
9+
"description": "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages."
10+
},
11+
"created_at": "2025-10-09 22:19:34.483",
12+
"tags": [],
13+
"properties": [
14+
{
15+
"property_id": "async_queue",
16+
"value": "true"
17+
},
18+
{
19+
"property_id": "price",
20+
"value": [
21+
{
22+
"unit": "per 1k chars input",
23+
"price": 0.03,
24+
"currency": "USD"
25+
}
26+
]
27+
},
28+
{
29+
"property_id": "partner",
30+
"value": "true"
31+
},
32+
{
33+
"property_id": "realtime",
34+
"value": "true"
35+
}
36+
],
37+
"schema": {
38+
"input": {
39+
"type": "object",
40+
"properties": {
41+
"speaker": {
42+
"type": "string",
43+
"enum": [
44+
"amalthea",
45+
"andromeda",
46+
"apollo",
47+
"arcas",
48+
"aries",
49+
"asteria",
50+
"athena",
51+
"atlas",
52+
"aurora",
53+
"callista",
54+
"cora",
55+
"cordelia",
56+
"delia",
57+
"draco",
58+
"electra",
59+
"harmonia",
60+
"helena",
61+
"hera",
62+
"hermes",
63+
"hyperion",
64+
"iris",
65+
"janus",
66+
"juno",
67+
"jupiter",
68+
"luna",
69+
"mars",
70+
"minerva",
71+
"neptune",
72+
"odysseus",
73+
"ophelia",
74+
"orion",
75+
"orpheus",
76+
"pandora",
77+
"phoebe",
78+
"pluto",
79+
"saturn",
80+
"thalia",
81+
"theia",
82+
"vesta",
83+
"zeus"
84+
],
85+
"default": "luna",
86+
"description": "Speaker used to produce the audio."
87+
},
88+
"encoding": {
89+
"type": "string",
90+
"enum": [
91+
"linear16",
92+
"flac",
93+
"mulaw",
94+
"alaw",
95+
"mp3",
96+
"opus",
97+
"aac"
98+
],
99+
"description": "Encoding of the output audio."
100+
},
101+
"container": {
102+
"type": "string",
103+
"enum": [
104+
"none",
105+
"wav",
106+
"ogg"
107+
],
108+
"description": "Container specifies the file format wrapper for the output audio. The available options depend on the encoding type.."
109+
},
110+
"text": {
111+
"type": "string",
112+
"description": "The text content to be converted to speech"
113+
},
114+
"sample_rate": {
115+
"type": "number",
116+
"description": "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
117+
},
118+
"bit_rate": {
119+
"type": "number",
120+
"description": "The bitrate of the audio in bits per second. Choose from predefined ranges or specific values based on the encoding type."
121+
}
122+
},
123+
"required": [
124+
"text"
125+
]
126+
},
127+
"output": {
128+
"type": "string",
129+
"contentType": "audio/mpeg",
130+
"format": "binary",
131+
"description": "The generated audio in MP3 format"
132+
}
133+
}
134+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
{
2+
"id": "c5255b94-2161-4779-bd25-54f061829a2a",
3+
"source": 1,
4+
"name": "@cf/deepgram/aura-2-es",
5+
"description": "Aura-2 is a context-aware text-to-speech (TTS) model that applies natural pacing, expressiveness, and fillers based on the context of the provided text. The quality of your text input directly impacts the naturalness of the audio output.",
6+
"task": {
7+
"id": "b52660a1-9a95-4ab2-8b1d-f232be34604a",
8+
"name": "Text-to-Speech",
9+
"description": "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages."
10+
},
11+
"created_at": "2025-10-09 22:42:37.002",
12+
"tags": [],
13+
"properties": [
14+
{
15+
"property_id": "async_queue",
16+
"value": "true"
17+
},
18+
{
19+
"property_id": "price",
20+
"value": [
21+
{
22+
"unit": "per 1k chars input",
23+
"price": 0.03,
24+
"currency": "USD"
25+
}
26+
]
27+
},
28+
{
29+
"property_id": "partner",
30+
"value": "true"
31+
},
32+
{
33+
"property_id": "realtime",
34+
"value": "true"
35+
}
36+
],
37+
"schema": {
38+
"input": {
39+
"type": "object",
40+
"properties": {
41+
"speaker": {
42+
"type": "string",
43+
"enum": [
44+
"sirio",
45+
"nestor",
46+
"carina",
47+
"celeste",
48+
"alvaro",
49+
"diana",
50+
"aquila",
51+
"selena",
52+
"estrella",
53+
"javier"
54+
],
55+
"default": "aquila",
56+
"description": "Speaker used to produce the audio."
57+
},
58+
"encoding": {
59+
"type": "string",
60+
"enum": [
61+
"linear16",
62+
"flac",
63+
"mulaw",
64+
"alaw",
65+
"mp3",
66+
"opus",
67+
"aac"
68+
],
69+
"description": "Encoding of the output audio."
70+
},
71+
"container": {
72+
"type": "string",
73+
"enum": [
74+
"none",
75+
"wav",
76+
"ogg"
77+
],
78+
"description": "Container specifies the file format wrapper for the output audio. The available options depend on the encoding type.."
79+
},
80+
"text": {
81+
"type": "string",
82+
"description": "The text content to be converted to speech"
83+
},
84+
"sample_rate": {
85+
"type": "number",
86+
"description": "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
87+
},
88+
"bit_rate": {
89+
"type": "number",
90+
"description": "The bitrate of the audio in bits per second. Choose from predefined ranges or specific values based on the encoding type."
91+
}
92+
},
93+
"required": [
94+
"text"
95+
]
96+
},
97+
"output": {
98+
"type": "string",
99+
"contentType": "audio/mpeg",
100+
"format": "binary",
101+
"description": "The generated audio in MP3 format"
102+
}
103+
}
104+
}

0 commit comments

Comments
 (0)