diff --git a/.gitignore b/.gitignore index 501cf6fc5247c53..5b9650b9c9b739b 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,6 @@ pnpm-debug.log* /assets/secrets /worker/functions/ -.idea \ No newline at end of file +.idea + +public/* \ No newline at end of file diff --git a/src/content/changelog/workers-ai/2025-08-05-openai-open-models.mdx b/src/content/changelog/workers-ai/2025-08-05-openai-open-models.mdx new file mode 100644 index 000000000000000..1dbfdc154f12e70 --- /dev/null +++ b/src/content/changelog/workers-ai/2025-08-05-openai-open-models.mdx @@ -0,0 +1,45 @@ +--- +title: OpenAI open models now available on Workers AI +description: Partnering with OpenAI as a Day 0 launch partner to bring you 2 new open models on Workers AI +products: + - agents + - workers-ai +date: 2025-08-05 +--- + +We're thrilled to be a Day 0 partner with [OpenAI](http://openai.com/index/introducing-gpt-oss) to bring their [latest open models](https://openai.com/index/gpt-oss-model-card/) to Workers AI, including support for Responses API, Code Interpreter, and Web Search (coming soon). + +Get started with the new models at `@cf/openai/gpt-oss-120b` and `@cf/openai/gpt-oss-20b`. +Check out the [blog](https://blog.cloudflare.com/openai-gpt-oss-on-workers-ai) for more details about the new models, and the [`gpt-oss-120b`](/workers-ai/models/gpt-oss-120b) and [`gpt-oss-20b`](/workers-ai/models/gpt-oss-20b) model pages for more information about pricing and context windows. + +## Responses API +If you call the model through: +- Workers Binding, it will accept/return Responses API – `env.AI.run(“@cf/openai/gpt-oss-120b”)` +- REST API on `/run` endpoint, it will accept/return Responses API – `https://api.cloudflare.com/client/v4/accounts//ai/run/@cf/openai/gpt-oss-120b` +- REST API on new `/responses` endpoint, it will accept/return Responses API – `https://api.cloudflare.com/client/v4/accounts//ai/v1/responses` +- REST API for OpenAI Compatible endpoint, it will return Chat Completions (coming soon) – `https://api.cloudflare.com/client/v4/accounts//ai/v1/chat/completions` + + +``` +curl https://api.cloudflare.com/client/v4/accounts//ai/v1/responses \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $CLOUDFLARE_API_KEY" \ + -d '{ + "model": "@cf/openai/gpt-oss-120b", + "reasoning": {"effort": "medium"}, + "input": [ + { + "role": "user", + "content": "What are the benefits of open-source models?" + } + ] + }' + + +``` + +## Code Interpreter +The model is natively trained to support stateful code execution, and we've implemented support for this feature using our [Sandbox SDK](https://github.com/cloudflare/sandbox-sdk) and [Containers](https://blog.cloudflare.com/containers-are-available-in-public-beta-for-simple-global-and-programmable/). Cloudflare's Developer Platform is uniquely positioned to support this feature, so we're very excited to bring our products together to support this new use case. + +## Web Search (coming soon) +We are working to implement Web Search for the model, where users can bring their own Exa API Key so the model can browse the Internet. diff --git a/src/content/workers-ai-models/gpt-oss-120b.json b/src/content/workers-ai-models/gpt-oss-120b.json new file mode 100644 index 000000000000000..f093238828177ea --- /dev/null +++ b/src/content/workers-ai-models/gpt-oss-120b.json @@ -0,0 +1,99 @@ +{ + "id": "f9f2250b-1048-4a52-9910-d0bf976616a1", + "source": 1, + "name": "@cf/openai/gpt-oss-120b", + "description": "OpenAI’s open-weight models designed for powerful reasoning, agentic tasks, and versatile developer use cases – gpt-oss-120b is for production, general purpose, high reasoning use-cases.", + "task": { + "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34", + "name": "Text Generation", + "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." + }, + "created_at": "2025-08-05 10:27:29.131", + "tags": [], + "properties": [ + { + "property_id": "context_window", + "value": "128000" + }, + { + "property_id": "price", + "value": [ + { + "unit": "per M input tokens", + "price": 0.35, + "currency": "USD" + }, + { + "unit": "per M output tokens", + "price": 0.75, + "currency": "USD" + } + ] + } + ], + "schema": { + "input": { + "type": "object", + "title": "GPT_OSS_Responses", + "properties": { + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the message input. One of user, assistant, system, or developer.", + "enum": [ + "user", + "assistant", + "system", + "developer" + ] + }, + "content": { + "oneOf": [ + { + "type": "string", + "description": "The content of the message as a string." + }, + { + "type": "array", + "description": "Refer to OpenAI Responses API docs to learn more about supported content types.", + "items": { + "type": "object", + "properties": {} + } + } + ] + } + } + } + } + ] + } + }, + "required": [ + "input" + ] + }, + "output": { + "oneOf": [ + { + "type": "object", + "contentType": "application/json" + }, + { + "type": "string", + "contentType": "text/event-stream", + "format": "binary" + } + ] + } + } +} \ No newline at end of file diff --git a/src/content/workers-ai-models/gpt-oss-20b.json b/src/content/workers-ai-models/gpt-oss-20b.json new file mode 100644 index 000000000000000..d3d6cb0cb2a425f --- /dev/null +++ b/src/content/workers-ai-models/gpt-oss-20b.json @@ -0,0 +1,99 @@ +{ + "id": "188a4e1e-253e-46d0-9616-0bf8c149763f", + "source": 1, + "name": "@cf/openai/gpt-oss-20b", + "description": "OpenAI’s open-weight models designed for powerful reasoning, agentic tasks, and versatile developer use cases – gpt-oss-20b is for lower latency, and local or specialized use-cases.", + "task": { + "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34", + "name": "Text Generation", + "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." + }, + "created_at": "2025-08-05 10:49:53.265", + "tags": [], + "properties": [ + { + "property_id": "context_window", + "value": "128000" + }, + { + "property_id": "price", + "value": [ + { + "unit": "per M input tokens", + "price": 0.2, + "currency": "USD" + }, + { + "unit": "per M output tokens", + "price": 0.3, + "currency": "USD" + } + ] + } + ], + "schema": { + "input": { + "type": "object", + "title": "GPT_OSS_Responses", + "properties": { + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the message input. One of user, assistant, system, or developer.", + "enum": [ + "user", + "assistant", + "system", + "developer" + ] + }, + "content": { + "oneOf": [ + { + "type": "string", + "description": "The content of the message as a string." + }, + { + "type": "array", + "description": "Refer to OpenAI Responses API docs to learn more about supported content types.", + "items": { + "type": "object", + "properties": {} + } + } + ] + } + } + } + } + ] + } + }, + "required": [ + "input" + ] + }, + "output": { + "oneOf": [ + { + "type": "object", + "contentType": "application/json" + }, + { + "type": "string", + "contentType": "text/event-stream", + "format": "binary" + } + ] + } + } +} \ No newline at end of file