pinning and pricing (#24187)

mchenco · craigsdennis · thomasgauvin · commit 40a48a64a721 · 2025-08-15T16:52:36.000-04:00
* pinning and pricing

* Adds Code Samples for Responses API

* Adds the /v1/responses to the note

* Adds Responses example

---------

Co-authored-by: Craig Dennis &lt;craigsdennis@gmail.com&gt;
diff --git a/src/components/ModelCatalog.tsx b/src/components/ModelCatalog.tsx
@@ -22,6 +22,8 @@ const ModelCatalog = ({ models }: { models: WorkersAIModelsSchema[] }) => {
 
 	// List of model names to pin at the top
 	const pinnedModelNames = [
+		"@cf/openai/gpt-oss-120b",
+		"@cf/openai/gpt-oss-20b",
 		"@cf/meta/llama-4-scout-17b-16e-instruct",
 		"@cf/meta/llama-3.3-70b-instruct-fp8-fast",
 		"@cf/meta/llama-3.1-8b-instruct-fast",
diff --git a/src/components/models/code/OpenAIResponsesTextGenerationCode.astro b/src/components/models/code/OpenAIResponsesTextGenerationCode.astro
@@ -0,0 +1,84 @@
+---
+import { z } from "astro:schema";
+import { Aside, Code } from "@astrojs/starlight/components";
+import Details from "~/components/Details.astro";
+
+type Props = z.infer<typeof props>;
+
+const props = z.object({
+	name: z.string(),
+	lora: z.boolean(),
+});
+
+const { name } = props.parse(Astro.props);
+
+const worker = `
+export default {
+	async fetch(request, env): Promise<Response> {
+		const response = await env.AI.run('${name}', {
+			instructions: 'You are a concise.',
+			input: 'What is the origin of the phrase Hello, World?',
+		});
+
+		return Response.json(response);
+	},
+} satisfies ExportedHandler<Env>;
+`;
+
+const python = `
+import os
+import requests
+
+ACCOUNT_ID = os.environ.get("CLOUDFLARE_ACCOUNT_ID")
+AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
+
+prompt = "Tell me all about PEP-8"
+response = requests.post(
+  f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/v1/responses",
+    headers={"Authorization": f"Bearer {AUTH_TOKEN}"},
+    json={
+			"model": "${name}",
+      "input": "Tell me all about PEP-8"
+    }
+)
+result = response.json()
+print(result)
+`;
+
+const curl = `
+curl https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/ai/v1/responses \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $CLOUDFLARE_AUTH_TOKEN" \
+  -d '{
+    "model": "${name}",
+    "input": "What are the benefits of open-source models?"
+  }'
+`;
+---
+
+<>
+	<Details header="Worker">
+		<Code code={worker} lang="ts" />
+	</Details>
+
+	<Details header="Python">
+		<Code code={python} lang="py" />
+	</Details>
+
+	<Details header="curl">
+		<Code code={curl} lang="sh" />
+	</Details>
+
+	<Aside type="note" title="OpenAI compatible endpoints">
+		Workers AI also supports OpenAI compatible API endpoints for{" "}
+		<code>/v1/chat/completions</code>, <code>/v1/responses</code>, and <code
+			>/v1/embeddings</code
+		>. For more details, refer to{" "}
+		<a href="/workers-ai/configuration/open-ai-compatibility/">
+			Configurations
+		</a>
+		.
+	</Aside>
+</>
+)
+
diff --git a/src/content/docs/workers-ai/configuration/open-ai-compatibility.mdx b/src/content/docs/workers-ai/configuration/open-ai-compatibility.mdx
@@ -3,12 +3,11 @@ pcx_content_type: configuration
 title: OpenAI compatible API endpoints
 sidebar:
   order: 3
-
 ---
 
-import { Render } from "~/components"
+import { Render } from "~/components";
 
-<Render file="openai-compatibility" /> <br/>
+<Render file="openai-compatibility" /> <br />
 
 ## Usage
 
@@ -22,20 +21,26 @@ With OpenAI compatible endpoints,you can leverage the [openai-node sdk](https://
 import OpenAI from "openai";
 
 const openai = new OpenAI({
-  apiKey: env.CLOUDFLARE_API_KEY,
-  baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`
- });
+	apiKey: env.CLOUDFLARE_API_KEY,
+	baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`,
+});
 
+// Use chat completions
 const chatCompletion = await openai.chat.completions.create({
-  messages: [{ role: "user", content: "Make some robot noises" }],
-  model: "@cf/meta/llama-3.1-8b-instruct",
- });
+	messages: [{ role: "user", content: "Make some robot noises" }],
+	model: "@cf/meta/llama-3.1-8b-instruct",
+});
 
-const embeddings = await openai.embeddings.create({
-    model: "@cf/baai/bge-large-en-v1.5",
-    input: "I love matcha"
-  });
+// Use responses
+const response = await openai.responses.create({
+	model: "@cf/openai/gpt-oss-120b",
+	input: "Talk to me about open source",
+});
 
+const embeddings = await openai.embeddings.create({
+	model: "@cf/baai/bge-large-en-v1.5",
+	input: "I love matcha",
+});
 ```
 
 ```bash title="cURL example"
diff --git a/src/content/docs/workers-ai/platform/pricing.mdx b/src/content/docs/workers-ai/platform/pricing.mdx
@@ -54,6 +54,8 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di
 | @cf/google/gemma-3-12b-it                    | $0.345 per M input tokens <br/> $0.556 per M output tokens | 31371 neurons per M input tokens <br/> 50560 neurons per M output tokens  |
 | @cf/qwen/qwq-32b                             | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens  |
 | @cf/qwen/qwen2.5-coder-32b-instruct          | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens  |
+| @cf/openai/gpt-oss-120b | $0.350 per M input tokens <br/> $0.750 per M output tokens | 31818 neurons per M input tokens <br/> 68182 neurons per M output tokens |
+| @cf/openai/gpt-oss-20b  | $0.200 per M input tokens <br/> $0.300 per M output tokens | 18182 neurons per M input tokens <br/> 27273 neurons per M output tokens |
 
 ## Embeddings model pricing
 
diff --git a/src/pages/workers-ai/models/[name].astro b/src/pages/workers-ai/models/[name].astro
@@ -28,6 +28,7 @@ import LlamaGuard from "~/components/models/code/LlamaGuard.astro";
 import BgeRerankerBase from "~/components/models/code/Bge-Reranker-Base.astro";
 
 import { authorData } from "~/components/models/data";
+import OpenAIResponsesTextGenerationCode from "~/components/models/code/OpenAIResponsesTextGenerationCode.astro";
 
 export const getStaticPaths = (async () => {
 	const models = await getCollection("workers-ai-models");
@@ -108,6 +109,13 @@ if (model.name === "@cf/baai/bge-reranker-base") {
 	CodeExamples = BgeRerankerBase;
 }
 
+if (
+	model.name === "@cf/openai/gpt-oss-120b" ||
+	model.name === "@cf/openai/gpt-oss-20b"
+) {
+	CodeExamples = OpenAIResponsesTextGenerationCode;
+}
+
 const description = model.description;
 
 const isBeta = model.properties.find(
@@ -148,26 +156,26 @@ const starlightPageProps = {
 					alt={`${author.name} logo`}
 				/>
 			) : (
-				<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black uppercase text-gray-400">
+				<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black text-gray-400 uppercase">
 					{model.name.split("/")[1].substring(0, 1)}
 				</div>
 			)
 		}
 		<div>
 			<h1
 				id="_top"
-				class="-mt-4! mb-0! flex items-center text-4xl! font-bold! leading-none!"
+				class="-mt-4! mb-0! flex items-center text-4xl! leading-none! font-bold!"
 			>
 				{name}
-				{isBeta && <Badge text="Beta" variant="caution" class="ml-3 mt-2" />}
+				{isBeta && <Badge text="Beta" variant="caution" class="mt-2 ml-3" />}
 			</h1>
 			<span class="-mt-1 block"><ModelInfo model={model} /></span>
 		</div>
 	</div>
 
 	<span class="mt-4 block font-mono text-sm text-gray-400">{model.name}</span>
 
-	<p class="mb-2! mt-3">{description}</p>
+	<p class="mt-3 mb-2!">{description}</p>
 
 	{
 		model.name === "@cf/meta/llama-3.2-11b-vision-instruct" && (
@@ -256,4 +264,4 @@ const starlightPageProps = {
 			<Code code={JSON.stringify(model.schema.output, null, 4)} lang="json" />
 		</TabItem>
 	</Tabs>
-</StarlightPage>
+</StarlightPage>