Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/components/ModelCatalog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const ModelCatalog = ({ models }: { models: WorkersAIModelsSchema[] }) => {

// List of model names to pin at the top
const pinnedModelNames = [
"@cf/openai/gpt-oss-120b",
"@cf/openai/gpt-oss-20b",
"@cf/meta/llama-4-scout-17b-16e-instruct",
"@cf/meta/llama-3.3-70b-instruct-fp8-fast",
"@cf/meta/llama-3.1-8b-instruct-fast",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
---
import { z } from "astro:schema";
import { Aside, Code } from "@astrojs/starlight/components";
import Details from "~/components/Details.astro";

type Props = z.infer<typeof props>;

const props = z.object({
name: z.string(),
lora: z.boolean(),
});

const { name } = props.parse(Astro.props);

const worker = `
export default {
async fetch(request, env): Promise<Response> {
const response = await env.AI.run('${name}', {
instructions: 'You are a concise.',
input: 'What is the origin of the phrase Hello, World?',
});

return Response.json(response);
},
} satisfies ExportedHandler<Env>;
`;

const python = `
import os
import requests

ACCOUNT_ID = os.environ.get("CLOUDFLARE_ACCOUNT_ID")
AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")

prompt = "Tell me all about PEP-8"
response = requests.post(
f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/v1/responses",
headers={"Authorization": f"Bearer {AUTH_TOKEN}"},
json={
"model": "${name}",
"input": "Tell me all about PEP-8"
}
)
result = response.json()
print(result)
`;

const curl = `
curl https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/ai/v1/responses \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $CLOUDFLARE_AUTH_TOKEN" \
-d '{
"model": "${name}",
"input": "What are the benefits of open-source models?"
}'
`;
---

<>
<Details header="Worker">
<Code code={worker} lang="ts" />
</Details>

<Details header="Python">
<Code code={python} lang="py" />
</Details>

<Details header="curl">
<Code code={curl} lang="sh" />
</Details>

<Aside type="note" title="OpenAI compatible endpoints">
Workers AI also supports OpenAI compatible API endpoints for{" "}
<code>/v1/chat/completions</code>, <code>/v1/responses</code>, and <code
>/v1/embeddings</code
>. For more details, refer to{" "}
<a href="/workers-ai/configuration/open-ai-compatibility/">
Configurations
</a>
.
</Aside>
</>
)

2 changes: 2 additions & 0 deletions src/content/docs/workers-ai/platform/pricing.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di
| @cf/google/gemma-3-12b-it | $0.345 per M input tokens <br/> $0.556 per M output tokens | 31371 neurons per M input tokens <br/> 50560 neurons per M output tokens |
| @cf/qwen/qwq-32b | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens |
| @cf/qwen/qwen2.5-coder-32b-instruct | $0.660 per M input tokens <br/> $1.000 per M output tokens | 60000 neurons per M input tokens <br/> 90909 neurons per M output tokens |
| @cf/openai/gpt-oss-120b | $0.350 per M input tokens <br/> $0.750 per M output tokens | 31818 neurons per M input tokens <br/> 68182 neurons per M output tokens |
| @cf/openai/gpt-oss-20b | $0.200 per M input tokens <br/> $0.300 per M output tokens | 18182 neurons per M input tokens <br/> 27273 neurons per M output tokens |

## Embeddings model pricing

Expand Down
18 changes: 13 additions & 5 deletions src/pages/workers-ai/models/[name].astro
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import LlamaGuard from "~/components/models/code/LlamaGuard.astro";
import BgeRerankerBase from "~/components/models/code/Bge-Reranker-Base.astro";

import { authorData } from "~/components/models/data";
import OpenAIResponsesTextGenerationCode from "~/components/models/code/OpenAIResponsesTextGenerationCode.astro";

export const getStaticPaths = (async () => {
const models = await getCollection("workers-ai-models");
Expand Down Expand Up @@ -108,6 +109,13 @@ if (model.name === "@cf/baai/bge-reranker-base") {
CodeExamples = BgeRerankerBase;
}

if (
model.name === "@cf/openai/gpt-oss-120b" ||
model.name === "@cf/openai/gpt-oss-20b"
) {
CodeExamples = OpenAIResponsesTextGenerationCode;
}

const description = model.description;

const isBeta = model.properties.find(
Expand Down Expand Up @@ -148,26 +156,26 @@ const starlightPageProps = {
alt={`${author.name} logo`}
/>
) : (
<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black uppercase text-gray-400">
<div class="mr-4 flex h-12 w-12 items-center justify-center rounded-md bg-gray-100 text-2xl font-black text-gray-400 uppercase">
{model.name.split("/")[1].substring(0, 1)}
</div>
)
}
<div>
<h1
id="_top"
class="-mt-4! mb-0! flex items-center text-4xl! font-bold! leading-none!"
class="-mt-4! mb-0! flex items-center text-4xl! leading-none! font-bold!"
>
{name}
{isBeta && <Badge text="Beta" variant="caution" class="ml-3 mt-2" />}
{isBeta && <Badge text="Beta" variant="caution" class="mt-2 ml-3" />}
</h1>
<span class="-mt-1 block"><ModelInfo model={model} /></span>
</div>
</div>

<span class="mt-4 block font-mono text-sm text-gray-400">{model.name}</span>

<p class="mb-2! mt-3">{description}</p>
<p class="mt-3 mb-2!">{description}</p>

{
model.name === "@cf/meta/llama-3.2-11b-vision-instruct" && (
Expand Down Expand Up @@ -256,4 +264,4 @@ const starlightPageProps = {
<Code code={JSON.stringify(model.schema.output, null, 4)} lang="json" />
</TabItem>
</Tabs>
</StarlightPage>
</StarlightPage>
Loading