new models (#25963)

mchenco · kodster28 · web-flow · commit 8653713cdbd8 · 2025-10-21T13:13:23.000-04:00
Co-authored-by: kodster28 &lt;kody@cloudflare.com&gt;
diff --git a/src/assets/images/workers-ai/ibm.svg b/src/assets/images/workers-ai/ibm.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
+<svg width="800px" height="800px" viewBox="0 -300 1000 1000" xmlns="http://www.w3.org/2000/svg"><path d="M0 0v27.367h194.648V0H0zm222.226 0v27.367h277.383S471.276 0 433.75 0H222.226zm331.797 0v27.367h167.812L711.875 0H554.023zm288.125 0l-9.961 27.367h166.289V0H842.148zM0 53.222v27.367h194.648V53.222H0zm222.226.039V80.59h309.57s-3.615-21.063-9.922-27.329H222.226zm331.797 0V80.59h186.211l-9.219-27.329H554.023zm268.203 0l-9.219 27.329h185.469V53.261h-176.25zM55.937 106.444v27.406h84.297v-27.406H55.937zm222.227 0v27.406h84.297v-27.406h-84.297zm166.289 0v27.406h84.297s5.352-14.473 5.352-27.406h-89.649zm165.508 0v27.406h149.453l-9.961-27.406H609.961zm193.906 0l-10 27.406h150.195v-27.406H803.867zm-747.93 53.262v27.367h84.297v-27.367H55.937zm222.227 0v27.367h215.312s18.012-14.042 23.75-27.367H278.164zm331.797 0v27.367h84.297v-15.234l5.352 15.234h154.414l5.742-15.234v15.234h84.297v-27.367H785.82l-8.398 23.18-8.438-23.18H609.961zM55.937 212.928v27.367h84.297v-27.367H55.937zm222.227 0v27.367h239.062c-5.739-13.281-23.75-27.367-23.75-27.367H278.164zm331.797 0v27.367h84.297v-27.367h-84.297zm99.609 0l10.195 27.367h115.781l9.688-27.367H709.57zm150.195 0v27.367h84.297v-27.367h-84.297zM55.937 266.15v27.366h84.297V266.15H55.937zm222.227 0v27.366h84.297V266.15h-84.297zm166.289 0v27.366h89.648c0-12.915-5.352-27.366-5.352-27.366h-84.296zm165.508 0v27.366h84.297V266.15h-84.297zm118.75 0l9.883 27.366h77.617l9.961-27.366h-97.461zm131.054 0v27.366h84.297V266.15h-84.297zM1.523 319.372v27.406h194.648v-27.406H1.523zm220.703 0v27.406h299.648c6.307-6.275 9.922-27.406 9.922-27.406h-309.57zm333.321 0v27.406h138.711v-27.406H555.547zm192.343 0l10.156 27.406h39.492l9.531-27.406H747.89zm111.875 0v27.406H1000v-27.406H859.765zM1.523 372.633V400h194.648v-27.367H1.523zm220.703 0v27.328H433.75c37.526 0 65.859-27.328 65.859-27.328H222.226zm333.321 0V400h138.711v-27.367H555.547zm211.601 0l9.766 27.29 1.68.038 9.922-27.328h-21.368zm92.617 0V400H1000v-27.367H859.765z" fill="#1f70c1"/></svg>
diff --git a/src/components/models/data.ts b/src/components/models/data.ts
@@ -10,6 +10,7 @@ import qwen from "../../assets/images/workers-ai/qwen.svg";
 import blackforestlabs from "../../assets/images/workers-ai/blackforestlabs.svg";
 import deepgram from "../../assets/images/workers-ai/deepgram.svg";
 import leonardo from "../../assets/images/workers-ai/leonardo.svg";
+import ibm from "../../assets/images/workers-ai/ibm.svg";
 
 export const authorData: Record<string, { name: string; logo: string }> = {
 	openai: {
@@ -64,4 +65,8 @@ export const authorData: Record<string, { name: string; logo: string }> = {
 		name: "Leonardo",
 		logo: leonardo.src,
 	},
+	"ibm-granite": {
+		name: "IBM",
+		logo: ibm.src,
+	},
 };
diff --git a/src/content/docs/workers-ai/platform/pricing.mdx b/src/content/docs/workers-ai/platform/pricing.mdx
@@ -88,6 +88,8 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di
 | @cf/deepgram/nova-3          | $0.0052 per audio minute input <br/>               | 472.73 neurons per audio minute input <br/>                   |
 | @cf/deepgram/nova-3 (WebSocket)| $0.0092 per audio minute input <br/>               | 836.36 neurons per audio minute input <br/>                   |
 | @cf/pipecat-ai/smart-turn-v2 | $0.00033795 per audio minute input <br/>            | 0.51 neurons per audio minute input <br/>                   |
+| @cf/deepgram/aura-2-en | $0.030 per 1k characters input <br/> | 2.73 neurons per 1k characters input <br/> |
+| @cf/deepgram/aura-2-es | $0.030 per 1k characters input <br/> | 2.73 neurons per 1k characters input <br/> |
 
 ## Other model pricing
 
diff --git a/src/content/release-notes/workers-ai.yaml b/src/content/release-notes/workers-ai.yaml
@@ -3,6 +3,11 @@ link: "/workers-ai/changelog/"
 productName: Workers AI
 productLink: "/workers-ai/"
 entries:
+  - publish_date: "2025-10-21"
+    title: New voice and LLM models on Workers AI
+    description: |-
+      - Deepgram Aura 2 brings new text-to-speech capabilities to Workers AI. Check out [`@cf/deepgram/aura-2-en`](/workers-ai/models/aura-2-en/) and [`@cf/deepgram/aura-2-es`](/workers-ai/models/aura-2-es/) on how to use the new models.
+      - IBM Granite model is also up! This new LLM model is small but mighty, take a look at the docs for more [`@cf/ibm-granite/granite-4.0-h-micro`](/workers-ai/models/granite-4.0-h-micro/)
   - publish_date: "2025-10-02"
     title: Deepgram Flux now available on Workers AI
     description: |-
diff --git a/src/content/workers-ai-models/aura-2-en.json b/src/content/workers-ai-models/aura-2-en.json
@@ -0,0 +1,134 @@
+{
+    "id": "01564c52-8717-47dc-8efd-907a2ca18301",
+    "source": 1,
+    "name": "@cf/deepgram/aura-2-en",
+    "description": "Aura-2 is a context-aware text-to-speech (TTS) model that applies natural pacing, expressiveness, and fillers based on the context of the provided text. The quality of your text input directly impacts the naturalness of the audio output.",
+    "task": {
+        "id": "b52660a1-9a95-4ab2-8b1d-f232be34604a",
+        "name": "Text-to-Speech",
+        "description": "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages."
+    },
+    "created_at": "2025-10-09 22:19:34.483",
+    "tags": [],
+    "properties": [
+        {
+            "property_id": "async_queue",
+            "value": "true"
+        },
+        {
+            "property_id": "price",
+            "value": [
+                {
+                    "unit": "per 1k chars input",
+                    "price": 0.03,
+                    "currency": "USD"
+                }
+            ]
+        },
+        {
+            "property_id": "partner",
+            "value": "true"
+        },
+        {
+            "property_id": "realtime",
+            "value": "true"
+        }
+    ],
+    "schema": {
+        "input": {
+            "type": "object",
+            "properties": {
+                "speaker": {
+                    "type": "string",
+                    "enum": [
+                        "amalthea",
+                        "andromeda",
+                        "apollo",
+                        "arcas",
+                        "aries",
+                        "asteria",
+                        "athena",
+                        "atlas",
+                        "aurora",
+                        "callista",
+                        "cora",
+                        "cordelia",
+                        "delia",
+                        "draco",
+                        "electra",
+                        "harmonia",
+                        "helena",
+                        "hera",
+                        "hermes",
+                        "hyperion",
+                        "iris",
+                        "janus",
+                        "juno",
+                        "jupiter",
+                        "luna",
+                        "mars",
+                        "minerva",
+                        "neptune",
+                        "odysseus",
+                        "ophelia",
+                        "orion",
+                        "orpheus",
+                        "pandora",
+                        "phoebe",
+                        "pluto",
+                        "saturn",
+                        "thalia",
+                        "theia",
+                        "vesta",
+                        "zeus"
+                    ],
+                    "default": "luna",
+                    "description": "Speaker used to produce the audio."
+                },
+                "encoding": {
+                    "type": "string",
+                    "enum": [
+                        "linear16",
+                        "flac",
+                        "mulaw",
+                        "alaw",
+                        "mp3",
+                        "opus",
+                        "aac"
+                    ],
+                    "description": "Encoding of the output audio."
+                },
+                "container": {
+                    "type": "string",
+                    "enum": [
+                        "none",
+                        "wav",
+                        "ogg"
+                    ],
+                    "description": "Container specifies the file format wrapper for the output audio. The available options depend on the encoding type.."
+                },
+                "text": {
+                    "type": "string",
+                    "description": "The text content to be converted to speech"
+                },
+                "sample_rate": {
+                    "type": "number",
+                    "description": "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
+                },
+                "bit_rate": {
+                    "type": "number",
+                    "description": "The bitrate of the audio in bits per second. Choose from predefined ranges or specific values based on the encoding type."
+                }
+            },
+            "required": [
+                "text"
+            ]
+        },
+        "output": {
+            "type": "string",
+            "contentType": "audio/mpeg",
+            "format": "binary",
+            "description": "The generated audio in MP3 format"
+        }
+    }
+}
diff --git a/src/content/workers-ai-models/aura-2-es.json b/src/content/workers-ai-models/aura-2-es.json
@@ -0,0 +1,104 @@
+{
+    "id": "c5255b94-2161-4779-bd25-54f061829a2a",
+    "source": 1,
+    "name": "@cf/deepgram/aura-2-es",
+    "description": "Aura-2 is a context-aware text-to-speech (TTS) model that applies natural pacing, expressiveness, and fillers based on the context of the provided text. The quality of your text input directly impacts the naturalness of the audio output.",
+    "task": {
+        "id": "b52660a1-9a95-4ab2-8b1d-f232be34604a",
+        "name": "Text-to-Speech",
+        "description": "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages."
+    },
+    "created_at": "2025-10-09 22:42:37.002",
+    "tags": [],
+    "properties": [
+        {
+            "property_id": "async_queue",
+            "value": "true"
+        },
+        {
+            "property_id": "price",
+            "value": [
+                {
+                    "unit": "per 1k chars input",
+                    "price": 0.03,
+                    "currency": "USD"
+                }
+            ]
+        },
+        {
+            "property_id": "partner",
+            "value": "true"
+        },
+        {
+            "property_id": "realtime",
+            "value": "true"
+        }
+    ],
+    "schema": {
+        "input": {
+            "type": "object",
+            "properties": {
+                "speaker": {
+                    "type": "string",
+                    "enum": [
+                        "sirio",
+                        "nestor",
+                        "carina",
+                        "celeste",
+                        "alvaro",
+                        "diana",
+                        "aquila",
+                        "selena",
+                        "estrella",
+                        "javier"
+                    ],
+                    "default": "aquila",
+                    "description": "Speaker used to produce the audio."
+                },
+                "encoding": {
+                    "type": "string",
+                    "enum": [
+                        "linear16",
+                        "flac",
+                        "mulaw",
+                        "alaw",
+                        "mp3",
+                        "opus",
+                        "aac"
+                    ],
+                    "description": "Encoding of the output audio."
+                },
+                "container": {
+                    "type": "string",
+                    "enum": [
+                        "none",
+                        "wav",
+                        "ogg"
+                    ],
+                    "description": "Container specifies the file format wrapper for the output audio. The available options depend on the encoding type.."
+                },
+                "text": {
+                    "type": "string",
+                    "description": "The text content to be converted to speech"
+                },
+                "sample_rate": {
+                    "type": "number",
+                    "description": "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
+                },
+                "bit_rate": {
+                    "type": "number",
+                    "description": "The bitrate of the audio in bits per second. Choose from predefined ranges or specific values based on the encoding type."
+                }
+            },
+            "required": [
+                "text"
+            ]
+        },
+        "output": {
+            "type": "string",
+            "contentType": "audio/mpeg",
+            "format": "binary",
+            "description": "The generated audio in MP3 format"
+        }
+    }
+}