diff --git a/src/components/ModelCatalog.tsx b/src/components/ModelCatalog.tsx index 5a7dc014b24fa8f..9a02787452b042e 100644 --- a/src/components/ModelCatalog.tsx +++ b/src/components/ModelCatalog.tsx @@ -22,6 +22,8 @@ const ModelCatalog = ({ models }: { models: WorkersAIModelsSchema[] }) => { // List of model names to pin at the top const pinnedModelNames = [ + "@cf/openai/gpt-oss-120b", + "@cf/openai/gpt-oss-20b", "@cf/meta/llama-4-scout-17b-16e-instruct", "@cf/meta/llama-3.3-70b-instruct-fp8-fast", "@cf/meta/llama-3.1-8b-instruct-fast", diff --git a/src/components/models/code/OpenAIResponsesTextGenerationCode.astro b/src/components/models/code/OpenAIResponsesTextGenerationCode.astro new file mode 100644 index 000000000000000..a12e70ec6481445 --- /dev/null +++ b/src/components/models/code/OpenAIResponsesTextGenerationCode.astro @@ -0,0 +1,84 @@ +--- +import { z } from "astro:schema"; +import { Aside, Code } from "@astrojs/starlight/components"; +import Details from "~/components/Details.astro"; + +type Props = z.infer; + +const props = z.object({ + name: z.string(), + lora: z.boolean(), +}); + +const { name } = props.parse(Astro.props); + +const worker = ` +export default { + async fetch(request, env): Promise { + const response = await env.AI.run('${name}', { + instructions: 'You are a concise.', + input: 'What is the origin of the phrase Hello, World?', + }); + + return Response.json(response); + }, +} satisfies ExportedHandler; +`; + +const python = ` +import os +import requests + +ACCOUNT_ID = os.environ.get("CLOUDFLARE_ACCOUNT_ID") +AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN") + +prompt = "Tell me all about PEP-8" +response = requests.post( + f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/v1/responses", + headers={"Authorization": f"Bearer {AUTH_TOKEN}"}, + json={ + "model": "${name}", + "input": "Tell me all about PEP-8" + } +) +result = response.json() +print(result) +`; + +const curl = ` +curl https://api.cloudflare.com/client/v4/accounts/$CLOUDFLARE_ACCOUNT_ID/ai/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $CLOUDFLARE_AUTH_TOKEN" \ + -d '{ + "model": "${name}", + "input": "What are the benefits of open-source models?" + }' +`; +--- + +<> +
+ +
+ +
+ +
+ +
+ +
+ + + +) + diff --git a/src/content/docs/workers-ai/configuration/open-ai-compatibility.mdx b/src/content/docs/workers-ai/configuration/open-ai-compatibility.mdx index 2a59d09b0bca393..8a29ebaf9341443 100644 --- a/src/content/docs/workers-ai/configuration/open-ai-compatibility.mdx +++ b/src/content/docs/workers-ai/configuration/open-ai-compatibility.mdx @@ -3,12 +3,11 @@ pcx_content_type: configuration title: OpenAI compatible API endpoints sidebar: order: 3 - --- -import { Render } from "~/components" +import { Render } from "~/components"; -
+
## Usage @@ -22,20 +21,26 @@ With OpenAI compatible endpoints,you can leverage the [openai-node sdk](https:// import OpenAI from "openai"; const openai = new OpenAI({ - apiKey: env.CLOUDFLARE_API_KEY, - baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1` - }); + apiKey: env.CLOUDFLARE_API_KEY, + baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`, +}); +// Use chat completions const chatCompletion = await openai.chat.completions.create({ - messages: [{ role: "user", content: "Make some robot noises" }], - model: "@cf/meta/llama-3.1-8b-instruct", - }); + messages: [{ role: "user", content: "Make some robot noises" }], + model: "@cf/meta/llama-3.1-8b-instruct", +}); -const embeddings = await openai.embeddings.create({ - model: "@cf/baai/bge-large-en-v1.5", - input: "I love matcha" - }); +// Use responses +const response = await openai.responses.create({ + model: "@cf/openai/gpt-oss-120b", + input: "Talk to me about open source", +}); +const embeddings = await openai.embeddings.create({ + model: "@cf/baai/bge-large-en-v1.5", + input: "I love matcha", +}); ``` ```bash title="cURL example" diff --git a/src/content/docs/workers-ai/platform/pricing.mdx b/src/content/docs/workers-ai/platform/pricing.mdx index 9c2101abcb3b8dc..513728353f95ded 100644 --- a/src/content/docs/workers-ai/platform/pricing.mdx +++ b/src/content/docs/workers-ai/platform/pricing.mdx @@ -54,6 +54,8 @@ The Price in Tokens column is equivalent to the Price in Neurons column - the di | @cf/google/gemma-3-12b-it | $0.345 per M input tokens
$0.556 per M output tokens | 31371 neurons per M input tokens
50560 neurons per M output tokens | | @cf/qwen/qwq-32b | $0.660 per M input tokens
$1.000 per M output tokens | 60000 neurons per M input tokens
90909 neurons per M output tokens | | @cf/qwen/qwen2.5-coder-32b-instruct | $0.660 per M input tokens
$1.000 per M output tokens | 60000 neurons per M input tokens
90909 neurons per M output tokens | +| @cf/openai/gpt-oss-120b | $0.350 per M input tokens
$0.750 per M output tokens | 31818 neurons per M input tokens
68182 neurons per M output tokens | +| @cf/openai/gpt-oss-20b | $0.200 per M input tokens
$0.300 per M output tokens | 18182 neurons per M input tokens
27273 neurons per M output tokens | ## Embeddings model pricing diff --git a/src/pages/workers-ai/models/[name].astro b/src/pages/workers-ai/models/[name].astro index f37151f8e672bd9..c65ccb844dd7bfd 100644 --- a/src/pages/workers-ai/models/[name].astro +++ b/src/pages/workers-ai/models/[name].astro @@ -28,6 +28,7 @@ import LlamaGuard from "~/components/models/code/LlamaGuard.astro"; import BgeRerankerBase from "~/components/models/code/Bge-Reranker-Base.astro"; import { authorData } from "~/components/models/data"; +import OpenAIResponsesTextGenerationCode from "~/components/models/code/OpenAIResponsesTextGenerationCode.astro"; export const getStaticPaths = (async () => { const models = await getCollection("workers-ai-models"); @@ -108,6 +109,13 @@ if (model.name === "@cf/baai/bge-reranker-base") { CodeExamples = BgeRerankerBase; } +if ( + model.name === "@cf/openai/gpt-oss-120b" || + model.name === "@cf/openai/gpt-oss-20b" +) { + CodeExamples = OpenAIResponsesTextGenerationCode; +} + const description = model.description; const isBeta = model.properties.find( @@ -148,7 +156,7 @@ const starlightPageProps = { alt={`${author.name} logo`} /> ) : ( -
+
{model.name.split("/")[1].substring(0, 1)}
) @@ -156,10 +164,10 @@ const starlightPageProps = {

{name} - {isBeta && } + {isBeta && }

@@ -167,7 +175,7 @@ const starlightPageProps = { {model.name} -

{description}

+

{description}

{ model.name === "@cf/meta/llama-3.2-11b-vision-instruct" && ( @@ -256,4 +264,4 @@ const starlightPageProps = { - \ No newline at end of file +