Skip to content

Commit 40a48a6

Browse files
mchencocraigsdennis
authored andcommitted
pinning and pricing (#24187)
* pinning and pricing * Adds Code Samples for Responses API * Adds the /v1/responses to the note * Adds Responses example --------- Co-authored-by: Craig Dennis <[email protected]>
1 parent 6007b02 commit 40a48a6

File tree

5 files changed

+119
-18
lines changed

5 files changed

+119
-18
lines changed

src/components/ModelCatalog.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ const ModelCatalog = ({ models }: { models: WorkersAIModelsSchema[] }) => {
2222

2323
// List of model names to pin at the top
2424
const pinnedModelNames = [
25+
"@cf/openai/gpt-oss-120b",
26+
"@cf/openai/gpt-oss-20b",
2527
"@cf/meta/llama-4-scout-17b-16e-instruct",
2628
"@cf/meta/llama-3.3-70b-instruct-fp8-fast",
2729
"@cf/meta/llama-3.1-8b-instruct-fast",
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
---
2+
import { z } from "astro:schema";
3+
import { Aside, Code } from "@astrojs/starlight/components";
4+
import Details from "~/components/Details.astro";
5+
6+
type Props = z.infer<typeof props>;
7+
8+
const props = z.object({
9+
name: z.string(),
10+
lora: z.boolean(),
11+
});
12+
13+
const { name } = props.parse(Astro.props);
14+
15+
const worker = `
16+
export default {
17+
async fetch(request, env): Promise<Response> {
18+
const response = await env.AI.run('${name}', {
19+
instructions: 'You are a concise.',
20+
input: 'What is the origin of the phrase Hello, World?',
21+
});
22+
23+
return Response.json(response);
24+
},
25+
} satisfies ExportedHandler<Env>;
26+
`;
27+
28+
const python = `
29+
import os
30+
import requests
31+
32+
ACCOUNT_ID = os.environ.get("CLOUDFLARE_ACCOUNT_ID")
33+
AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
34+
35+
prompt = "Tell me all about PEP-8"
36+
response = requests.post(
37+
f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/v1/responses",
38+
headers={"Authorization": f"Bearer {AUTH_TOKEN}"},
39+
json={
40+
"model": "${name}",
41+
"input": "Tell me all about PEP-8"
42+
}
43+
)
44+
result = response.json()
45+
print(result)
46+
`;
47+
48+
const curl = `
49+
curl https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/ai/v1/responses \
50+
-H "Content-Type: application/json" \
51+
-H "Authorization: Bearer $CLOUDFLARE_AUTH_TOKEN" \
52+
-d '{
53+
"model": "${name}",
54+
"input": "What are the benefits of open-source models?"
55+
}'
56+
`;
57+
---
58+
59+
<>
60+
<Details header="Worker">
61+
<Code code={worker} lang="ts" />
62+
</Details>
63+
64+
<Details header="Python">
65+
<Code code={python} lang="py" />
66+
</Details>
67+
68+
<Details header="curl">
69+
<Code code={curl} lang="sh" />
70+
</Details>
71+
72+
<Aside type="note" title="OpenAI compatible endpoints">
73+
Workers AI also supports OpenAI compatible API endpoints for{" "}
74+
<code>/v1/chat/completions</code>, <code>/v1/responses</code>, and <code
75+
>/v1/embeddings</code
76+
>. For more details, refer to{" "}
77+
<a href="/workers-ai/configuration/open-ai-compatibility/">
78+
Configurations
79+
</a>
80+
.
81+
</Aside>
82+
</>
83+
)
84+

src/content/docs/workers-ai/configuration/open-ai-compatibility.mdx

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@ pcx_content_type: configuration
33
title: OpenAI compatible API endpoints
44
sidebar:
55
order: 3
6-
76
---
87

9-
import { Render } from "~/components"
8+
import { Render } from "~/components";
109

11-
<Render file="openai-compatibility" /> <br/>
10+
<Render file="openai-compatibility" /> <br />
1211

1312
## Usage
1413

@@ -22,20 +21,26 @@ With OpenAI compatible endpoints,you can leverage the [openai-node sdk](https://
2221
import OpenAI from "openai";
2322

2423
const openai = new OpenAI({
25-
apiKey: env.CLOUDFLARE_API_KEY,
26-
baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`
27-
});
24+
apiKey: env.CLOUDFLARE_API_KEY,
25+
baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`,
26+
});
2827

28+
// Use chat completions
2929
const chatCompletion = await openai.chat.completions.create({
30-
messages: [{ role: "user", content: "Make some robot noises" }],
31-
model: "@cf/meta/llama-3.1-8b-instruct",
32-
});
30+
messages: [{ role: "user", content: "Make some robot noises" }],
31+
model: "@cf/meta/llama-3.1-8b-instruct",
32+
});
3333

34-
const embeddings = await openai.embeddings.create({
35-
model: "@cf/baai/bge-large-en-v1.5",
36-
input: "I love matcha"
37-
});
34+
// Use responses
35+
const response = await openai.responses.create({
36+
model: "@cf/openai/gpt-oss-120b",
37+
input: "Talk to me about open source",
38+
});
3839

40+
const embeddings = await openai.embeddings.create({
41+
model: "@cf/baai/bge-large-en-v1.5",
42+
input: "I love matcha",
43+
});
3944
```
4045

4146
```bash title="cURL example"

src/content/docs/workers-ai/platform/pricing.mdx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di
5454
| @cf/google/gemma-3-12b-it | $0.345 per M input tokens <br/> $0.556 per M output tokens | 31371 neurons per M input tokens <br/> 50560 neurons per M output tokens |
5555
| @cf/qwen/qwq-32b | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens |
5656
| @cf/qwen/qwen2.5-coder-32b-instruct | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens |
57+
| @cf/openai/gpt-oss-120b | $0.350 per M input tokens <br/> $0.750 per M output tokens | 31818 neurons per M input tokens <br/> 68182 neurons per M output tokens |
58+
| @cf/openai/gpt-oss-20b | $0.200 per M input tokens <br/> $0.300 per M output tokens | 18182 neurons per M input tokens <br/> 27273 neurons per M output tokens |
5759

5860
## Embeddings model pricing
5961

src/pages/workers-ai/models/[name].astro

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import LlamaGuard from "~/components/models/code/LlamaGuard.astro";
2828
import BgeRerankerBase from "~/components/models/code/Bge-Reranker-Base.astro";
2929
3030
import { authorData } from "~/components/models/data";
31+
import OpenAIResponsesTextGenerationCode from "~/components/models/code/OpenAIResponsesTextGenerationCode.astro";
3132
3233
export const getStaticPaths = (async () => {
3334
const models = await getCollection("workers-ai-models");
@@ -108,6 +109,13 @@ if (model.name === "@cf/baai/bge-reranker-base") {
108109
CodeExamples = BgeRerankerBase;
109110
}
110111
112+
if (
113+
model.name === "@cf/openai/gpt-oss-120b" ||
114+
model.name === "@cf/openai/gpt-oss-20b"
115+
) {
116+
CodeExamples = OpenAIResponsesTextGenerationCode;
117+
}
118+
111119
const description = model.description;
112120
113121
const isBeta = model.properties.find(
@@ -148,26 +156,26 @@ const starlightPageProps = {
148156
alt={`${author.name} logo`}
149157
/>
150158
) : (
151-
<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black uppercase text-gray-400">
159+
<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black text-gray-400 uppercase">
152160
{model.name.split("/")[1].substring(0, 1)}
153161
</div>
154162
)
155163
}
156164
<div>
157165
<h1
158166
id="_top"
159-
class="-mt-4! mb-0! flex items-center text-4xl! font-bold! leading-none!"
167+
class="-mt-4! mb-0! flex items-center text-4xl! leading-none! font-bold!"
160168
>
161169
{name}
162-
{isBeta && <Badge text="Beta" variant="caution" class="ml-3 mt-2" />}
170+
{isBeta && <Badge text="Beta" variant="caution" class="mt-2 ml-3" />}
163171
</h1>
164172
<span class="-mt-1 block"><ModelInfo model={model} /></span>
165173
</div>
166174
</div>
167175

168176
<span class="mt-4 block font-mono text-sm text-gray-400">{model.name}</span>
169177

170-
<p class="mb-2! mt-3">{description}</p>
178+
<p class="mt-3 mb-2!">{description}</p>
171179

172180
{
173181
model.name === "@cf/meta/llama-3.2-11b-vision-instruct" && (
@@ -256,4 +264,4 @@ const starlightPageProps = {
256264
<Code code={JSON.stringify(model.schema.output, null, 4)} lang="json" />
257265
</TabItem>
258266
</Tabs>
259-
</StarlightPage>
267+
</StarlightPage>

0 commit comments

Comments
 (0)