diff --git a/src/content/docs/ai-gateway/chat-completion.mdx b/src/content/docs/ai-gateway/chat-completion.mdx new file mode 100644 index 000000000000000..1e9972a711989a8 --- /dev/null +++ b/src/content/docs/ai-gateway/chat-completion.mdx @@ -0,0 +1,110 @@ +--- +title: OpenAI Compatibility +pcx_content_type: reference +sidebar: + order: 4 +--- + +Cloudflare's AI Gateway offers an OpenAI-compatible `/chat/completions` endpoint, enabling integration with multiple AI providers using a single URL. This feature simplifies the integration process, allowing for seamless switching between different models without significant code modifications. + +## Endpoint URL + +```txt +https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}/compat/chat/completions +``` + +Replace `{account_id}` and `{gateway_id}` with your Cloudflare account and gateway IDs. + +## Parameters + +Switch providers by changing the `model` and `apiKey` parameters. + +Specify the model using `{provider}/{model}` format. For example: + +- `openai/gpt-4o-mini` +- `google-ai-studio/gemini-2.0-flash` +- `anthropic/claude-3-haiku` + +## Examples + +### OpenAI SDK + +```js +import OpenAI from "openai"; +const client = new OpenAI({ + apiKey: "YOUR_PROVIDER_API_KEY", // Provider API key + baseURL: + "https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}/compat/chat/completions", +}); + +const response = await client.chat.completions.create({ + model: "google-ai-studio/gemini-2.0-flash", + messages: [{ role: "user", content: "What is Cloudflare?" }], +}); + +console.log(response.choices[0].message.content); +``` + +### cURL + +```bash +curl -X POST https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}/compat/chat/completions \ + --header 'Authorization: Bearer {openai_token}' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "google-ai-studio/gemini-2.0-flash", + "messages": [ + { + "role": "user", + "content": "What is Cloudflare?" + } + ] + }' +``` + +### Universal provider + +You can also use this pattern with a [Universal Endpoint](/ai-gateway/universal/). + +```ts title="index.ts" +export interface Env { + AI: Ai; +} + +export default { + async fetch(request: Request, env: Env) { + return env.AI.gateway("default").run({ + provider: "compat", + endpoint: "chat/completions", + headers: { + authorization: "Bearer ", + }, + query: { + model: "google-ai-studio/gemini-2.0-flash", + messages: [ + { + role: "user", + content: "What is Cloudflare?", + }, + ], + }, + }); + }, +}; +``` + +## Supported Providers + +The OpenAI-compatible endpoint supports models from the following providers: + +- [Anthropic](/ai-gateway/providers/anthropic/) +- [OpenAI](/ai-gateway/providers/openai/) +- [Groq](/ai-gateway/providers/groq/) +- [Mistral](/ai-gateway/providers/mistral/) +- [Cohere](/ai-gateway/providers/cohere/) +- [Perplexity](/ai-gateway/providers/perplexity/) +- [Workers AI](/ai-gateway/providers/workersai/) +- [Google-AI-Studio](/ai-gateway/providers/google-ai-studio/) +- [Grok](/ai-gateway/providers/grok/) +- [DeepSeek](/ai-gateway/providers/deepseek/) +- [Cerebras](/ai-gateway/providers/cerebras/) diff --git a/src/content/docs/ai-gateway/configuration/index.mdx b/src/content/docs/ai-gateway/configuration/index.mdx index 0b4789dcc2174de..285bdbf506c0780 100644 --- a/src/content/docs/ai-gateway/configuration/index.mdx +++ b/src/content/docs/ai-gateway/configuration/index.mdx @@ -4,7 +4,7 @@ title: Configuration sidebar: group: hideIndex: true - order: 4 + order: 5 --- import { DirectoryListing } from "~/components"; diff --git a/src/content/docs/ai-gateway/providers/anthropic.mdx b/src/content/docs/ai-gateway/providers/anthropic.mdx index 77908d54a450b73..469deee6cfa7047 100644 --- a/src/content/docs/ai-gateway/providers/anthropic.mdx +++ b/src/content/docs/ai-gateway/providers/anthropic.mdx @@ -3,6 +3,8 @@ title: Anthropic pcx_content_type: get-started --- +import { Render } from "~/components"; + [Anthropic](https://www.anthropic.com/) helps build reliable, interpretable, and steerable AI systems. ## Endpoint @@ -65,3 +67,17 @@ const message = await anthropic.messages.create({ max_tokens: maxTokens, }); ``` + + diff --git a/src/content/docs/ai-gateway/providers/cerebras.mdx b/src/content/docs/ai-gateway/providers/cerebras.mdx index 6a6554cb34e8499..d7ab6508d05a5f5 100644 --- a/src/content/docs/ai-gateway/providers/cerebras.mdx +++ b/src/content/docs/ai-gateway/providers/cerebras.mdx @@ -6,6 +6,8 @@ sidebar: text: Beta --- +import { Render } from "~/components"; + [Cerebras](https://inference-docs.cerebras.ai/) offers developers a low-latency solution for AI model inference. ## Endpoint @@ -41,3 +43,17 @@ curl https://gateway.ai.cloudflare.com/v1/ACCOUNT_TAG/GATEWAY/cerebras/chat/comp ] }' ``` + + diff --git a/src/content/docs/ai-gateway/providers/cohere.mdx b/src/content/docs/ai-gateway/providers/cohere.mdx index a08203969be4f7c..5c8af84f912fe44 100644 --- a/src/content/docs/ai-gateway/providers/cohere.mdx +++ b/src/content/docs/ai-gateway/providers/cohere.mdx @@ -3,6 +3,8 @@ title: Cohere pcx_content_type: get-started --- +import { Render } from "~/components"; + [Cohere](https://cohere.com/) build AI models designed to solve real-world business challenges. ## Endpoint @@ -72,3 +74,17 @@ chat = co.chat( print(chat) ``` + + diff --git a/src/content/docs/ai-gateway/providers/deepseek.mdx b/src/content/docs/ai-gateway/providers/deepseek.mdx index a7991797f2e628d..a0c69249d1c63fe 100644 --- a/src/content/docs/ai-gateway/providers/deepseek.mdx +++ b/src/content/docs/ai-gateway/providers/deepseek.mdx @@ -6,6 +6,8 @@ sidebar: text: Beta --- +import { Render } from "~/components"; + [DeepSeek](https://www.deepseek.com/) helps you build quickly with DeepSeek's advanced AI models. ## Endpoint @@ -80,3 +82,17 @@ try { return new Response(e); } ``` + + diff --git a/src/content/docs/ai-gateway/providers/google-ai-studio.mdx b/src/content/docs/ai-gateway/providers/google-ai-studio.mdx index 028c77e4c8e7578..d46fad66db4cebd 100644 --- a/src/content/docs/ai-gateway/providers/google-ai-studio.mdx +++ b/src/content/docs/ai-gateway/providers/google-ai-studio.mdx @@ -3,6 +3,8 @@ title: Google AI Studio pcx_content_type: get-started --- +import { Render } from "~/components"; + [Google AI Studio](https://ai.google.dev/aistudio) helps you build quickly with Google Gemini models. ## Endpoint @@ -69,3 +71,17 @@ const model = genAI.getGenerativeModel( await model.generateContent(["What is Cloudflare?"]); ``` + + diff --git a/src/content/docs/ai-gateway/providers/grok.mdx b/src/content/docs/ai-gateway/providers/grok.mdx index a417e77cdd32002..7a774e03578a719 100644 --- a/src/content/docs/ai-gateway/providers/grok.mdx +++ b/src/content/docs/ai-gateway/providers/grok.mdx @@ -3,7 +3,9 @@ title: Grok pcx_content_type: get-started --- -[Grok](https://docs.x.ai/docs#getting-started) is a general purpose model that can be used for a variety of tasks, including generating and understanding text, code, and function calling. +import { Render } from "~/components"; + +[Grok](https://docs.x.ai/docs#getting-started) is s a general purpose model that can be used for a variety of tasks, including generating and understanding text, code, and function calling. ## Endpoint @@ -156,3 +158,17 @@ message = client.messages.create( print(message.content) ``` + + diff --git a/src/content/docs/ai-gateway/providers/groq.mdx b/src/content/docs/ai-gateway/providers/groq.mdx index 7dfd85c99160ce8..ea59d194a1badbc 100644 --- a/src/content/docs/ai-gateway/providers/groq.mdx +++ b/src/content/docs/ai-gateway/providers/groq.mdx @@ -3,6 +3,8 @@ title: Groq pcx_content_type: get-started --- +import { Render } from "~/components"; + [Groq](https://groq.com/) delivers high-speed processing and low-latency performance. ## Endpoint @@ -68,3 +70,17 @@ const chatCompletion = await groq.chat.completions.create({ model, }); ``` + + diff --git a/src/content/docs/ai-gateway/providers/index.mdx b/src/content/docs/ai-gateway/providers/index.mdx index aa20bdc7e8c68b4..232f3679e1abfd8 100644 --- a/src/content/docs/ai-gateway/providers/index.mdx +++ b/src/content/docs/ai-gateway/providers/index.mdx @@ -4,7 +4,7 @@ title: Model providers sidebar: group: hideIndex: true - order: 3 + order: 4 --- Here is a quick list of the providers we support: diff --git a/src/content/docs/ai-gateway/providers/mistral.mdx b/src/content/docs/ai-gateway/providers/mistral.mdx index 7c054394379137f..d377361d9f841ea 100644 --- a/src/content/docs/ai-gateway/providers/mistral.mdx +++ b/src/content/docs/ai-gateway/providers/mistral.mdx @@ -3,6 +3,8 @@ title: Mistral AI pcx_content_type: get-started --- +import { Render } from "~/components"; + [Mistral AI](https://mistral.ai) helps you build quickly with Mistral's advanced AI models. ## Endpoint @@ -69,3 +71,17 @@ await client.chat.create({ ], }); ``` + + diff --git a/src/content/docs/ai-gateway/providers/perplexity.mdx b/src/content/docs/ai-gateway/providers/perplexity.mdx index d990938c3d1c42e..192c90fcd9c4abc 100644 --- a/src/content/docs/ai-gateway/providers/perplexity.mdx +++ b/src/content/docs/ai-gateway/providers/perplexity.mdx @@ -3,6 +3,8 @@ title: Perplexity pcx_content_type: get-started --- +import { Render } from "~/components"; + [Perplexity](https://www.perplexity.ai/) is an AI powered answer engine. ## Endpoint @@ -67,3 +69,17 @@ const chatCompletion = await perplexity.chat.completions.create({ max_tokens: maxTokens, }); ``` + + diff --git a/src/content/docs/ai-gateway/providers/workersai.mdx b/src/content/docs/ai-gateway/providers/workersai.mdx index 5d8cc4334f40979..f46d95ba5360583 100644 --- a/src/content/docs/ai-gateway/providers/workersai.mdx +++ b/src/content/docs/ai-gateway/providers/workersai.mdx @@ -117,3 +117,17 @@ Workers AI supports the following parameters for AI gateways: - Controls whether the request should [skip the cache](/ai-gateway/configuration/caching/#skip-cache-cf-aig-skip-cache). - `cacheTtl` number - Controls the [Cache TTL](/ai-gateway/configuration/caching/#cache-ttl-cf-aig-cache-ttl). + + diff --git a/src/content/docs/ai-gateway/websockets-api/index.mdx b/src/content/docs/ai-gateway/websockets-api/index.mdx index 7780bd024862098..f20672a6aece581 100644 --- a/src/content/docs/ai-gateway/websockets-api/index.mdx +++ b/src/content/docs/ai-gateway/websockets-api/index.mdx @@ -4,7 +4,7 @@ pcx_content_type: configuration sidebar: group: badge: Beta - order: 3 + order: 4 --- The AI Gateway WebSockets API provides a persistent connection for AI interactions, eliminating repeated handshakes and reducing latency. This API is divided into two categories: diff --git a/src/content/partials/ai-gateway/chat-completions-providers.mdx b/src/content/partials/ai-gateway/chat-completions-providers.mdx new file mode 100644 index 000000000000000..2ff0abfb9a64a2d --- /dev/null +++ b/src/content/partials/ai-gateway/chat-completions-providers.mdx @@ -0,0 +1,19 @@ +--- +params: + - name + - jsonexample +--- + +import { Code } from "~/components"; + +## OpenAI-Compatible Endpoint + +You can also use the [OpenAI-compatible endpoint](/ai-gateway/chat-completion/) (`/ai-gateway/chat-completion/`) to access {props.name} models using the OpenAI API schema. To do so, send your requests to: + +```txt +https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}/compat/chat/completions +``` + +Specify: + +