Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/components/ModelCatalog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const ModelCatalog = ({ models }: { models: WorkersAIModelsSchema[] }) => {

// List of model names to pin at the top
const pinnedModelNames = [
"@cf/openai/gpt-oss-120b",
"@cf/openai/gpt-oss-20b",
"@cf/meta/llama-4-scout-17b-16e-instruct",
"@cf/meta/llama-3.3-70b-instruct-fp8-fast",
"@cf/meta/llama-3.1-8b-instruct-fast",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
---
import { z } from "astro:schema";
import { Aside, Code } from "@astrojs/starlight/components";
import Details from "~/components/Details.astro";

type Props = z.infer<typeof props>;

const props = z.object({
name: z.string(),
lora: z.boolean(),
});

const { name } = props.parse(Astro.props);

const worker = `
export default {
async fetch(request, env): Promise<Response> {
const response = await env.AI.run('${name}', {
instructions: 'You are a concise.',
input: 'What is the origin of the phrase Hello, World?',
});

return Response.json(response);
},
} satisfies ExportedHandler<Env>;
`;

const python = `
import os
import requests

ACCOUNT_ID = os.environ.get("CLOUDFLARE_ACCOUNT_ID")
AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")

prompt = "Tell me all about PEP-8"
response = requests.post(
f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/v1/responses",
headers={"Authorization": f"Bearer {AUTH_TOKEN}"},
json={
"model": "${name}",
"input": "Tell me all about PEP-8"
}
)
result = response.json()
print(result)
`;

const curl = `
curl https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/ai/v1/responses \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $CLOUDFLARE_AUTH_TOKEN" \
-d '{
"model": "${name}",
"input": "What are the benefits of open-source models?"
}'
`;
---

<>
<Details header="Worker">
<Code code={worker} lang="ts" />
</Details>

<Details header="Python">
<Code code={python} lang="py" />
</Details>

<Details header="curl">
<Code code={curl} lang="sh" />
</Details>

<Aside type="note" title="OpenAI compatible endpoints">
Workers AI also supports OpenAI compatible API endpoints for{" "}
<code>/v1/chat/completions</code>, <code>/v1/responses</code>, and <code
>/v1/embeddings</code
>. For more details, refer to{" "}
<a href="/workers-ai/configuration/open-ai-compatibility/">
Configurations
</a>
.
</Aside>
</>
)

Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ pcx_content_type: configuration
title: OpenAI compatible API endpoints
sidebar:
order: 3

---

import { Render } from "~/components"
import { Render } from "~/components";

<Render file="openai-compatibility" /> <br/>
<Render file="openai-compatibility" /> <br />

## Usage

Expand All @@ -22,20 +21,26 @@ With OpenAI compatible endpoints,you can leverage the [openai-node sdk](https://
import OpenAI from "openai";

const openai = new OpenAI({
apiKey: env.CLOUDFLARE_API_KEY,
baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`
});
apiKey: env.CLOUDFLARE_API_KEY,
baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`,
});

// Use chat completions
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: "user", content: "Make some robot noises" }],
model: "@cf/meta/llama-3.1-8b-instruct",
});
messages: [{ role: "user", content: "Make some robot noises" }],
model: "@cf/meta/llama-3.1-8b-instruct",
});

const embeddings = await openai.embeddings.create({
model: "@cf/baai/bge-large-en-v1.5",
input: "I love matcha"
});
// Use responses
const response = await openai.responses.create({
model: "@cf/openai/gpt-oss-120b",
input: "Talk to me about open source",
});

const embeddings = await openai.embeddings.create({
model: "@cf/baai/bge-large-en-v1.5",
input: "I love matcha",
});
```

```bash title="cURL example"
Expand Down
2 changes: 2 additions & 0 deletions src/content/docs/workers-ai/platform/pricing.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di
| @cf/google/gemma-3-12b-it | $0.345 per M input tokens <br/> $0.556 per M output tokens | 31371 neurons per M input tokens <br/> 50560 neurons per M output tokens |
| @cf/qwen/qwq-32b | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens |
| @cf/qwen/qwen2.5-coder-32b-instruct | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens |
| @cf/openai/gpt-oss-120b | $0.350 per M input tokens <br/> $0.750 per M output tokens | 31818 neurons per M input tokens <br/> 68182 neurons per M output tokens |
| @cf/openai/gpt-oss-20b | $0.200 per M input tokens <br/> $0.300 per M output tokens | 18182 neurons per M input tokens <br/> 27273 neurons per M output tokens |

## Embeddings model pricing

Expand Down
18 changes: 13 additions & 5 deletions src/pages/workers-ai/models/[name].astro
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import LlamaGuard from "~/components/models/code/LlamaGuard.astro";
import BgeRerankerBase from "~/components/models/code/Bge-Reranker-Base.astro";

import { authorData } from "~/components/models/data";
import OpenAIResponsesTextGenerationCode from "~/components/models/code/OpenAIResponsesTextGenerationCode.astro";

export const getStaticPaths = (async () => {
const models = await getCollection("workers-ai-models");
Expand Down Expand Up @@ -108,6 +109,13 @@ if (model.name === "@cf/baai/bge-reranker-base") {
CodeExamples = BgeRerankerBase;
}

if (
model.name === "@cf/openai/gpt-oss-120b" ||
model.name === "@cf/openai/gpt-oss-20b"
) {
CodeExamples = OpenAIResponsesTextGenerationCode;
}

const description = model.description;

const isBeta = model.properties.find(
Expand Down Expand Up @@ -148,26 +156,26 @@ const starlightPageProps = {
alt={`${author.name} logo`}
/>
) : (
<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black uppercase text-gray-400">
<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black text-gray-400 uppercase">
{model.name.split("/")[1].substring(0, 1)}
</div>
)
}
<div>
<h1
id="_top"
class="-mt-4! mb-0! flex items-center text-4xl! font-bold! leading-none!"
class="-mt-4! mb-0! flex items-center text-4xl! leading-none! font-bold!"
>
{name}
{isBeta && <Badge text="Beta" variant="caution" class="ml-3 mt-2" />}
{isBeta && <Badge text="Beta" variant="caution" class="mt-2 ml-3" />}
</h1>
<span class="-mt-1 block"><ModelInfo model={model} /></span>
</div>
</div>

<span class="mt-4 block font-mono text-sm text-gray-400">{model.name}</span>

<p class="mb-2! mt-3">{description}</p>
<p class="mt-3 mb-2!">{description}</p>

{
model.name === "@cf/meta/llama-3.2-11b-vision-instruct" && (
Expand Down Expand Up @@ -256,4 +264,4 @@ const starlightPageProps = {
<Code code={JSON.stringify(model.schema.output, null, 4)} lang="json" />
</TabItem>
</Tabs>
</StarlightPage>
</StarlightPage>
Loading