diff --git a/docs/inference-providers/guides/building-first-app.md b/docs/inference-providers/guides/building-first-app.md
index 57c79ce45..628cc710b 100644
--- a/docs/inference-providers/guides/building-first-app.md
+++ b/docs/inference-providers/guides/building-first-app.md
@@ -7,6 +7,7 @@ This project demonstrates real-world AI orchestration using multiple specialized
 ## Project Overview
 
 Our app will:
+
 1. **Accept audio** as a microphone input through a web interface
 2. **Transcribe speech** using a fast speech-to-text model
 3. **Generate summaries** using a powerful language model
@@ -76,7 +77,7 @@ def process_meeting_audio(audio_file):
     """Process uploaded audio file and return transcript + summary"""
     if audio_file is None:
         return "Please upload an audio file.", ""
-    
+
     # We'll implement the AI logic next
     return "Transcript will appear here...", "Summary will appear here..."
 
@@ -105,26 +106,26 @@ For JavaScript, we'll create a clean HTML interface with native file upload and
 
 ```html
 <body>
-    <h1>🎤 AI Meeting Notes</h1>
-    
-    <div class="upload" onclick="document.getElementById('file').click()">
-        <input type="file" id="file" accept="audio/*">
-        <p>Upload audio file</p>
-        <button type="button">Choose File</button>
+  <h1>🎤 AI Meeting Notes</h1>
+
+  <div class="upload" onclick="document.getElementById('file').click()">
+    <input type="file" id="file" accept="audio/*" />
+    <p>Upload audio file</p>
+    <button type="button">Choose File</button>
+  </div>
+
+  <div class="loading" id="loading">Processing...</div>
+
+  <div class="results" id="results">
+    <div class="result">
+      <h3>📝 Transcript</h3>
+      <div id="transcript"></div>
     </div>
-    
-    <div class="loading" id="loading">Processing...</div>
-    
-    <div class="results" id="results">
-        <div class="result">
-            <h3>📝 Transcript</h3>
-            <div id="transcript"></div>
-        </div>
-        <div class="result">
-            <h3>📋 Summary</h3>
-            <div id="summary"></div>
-        </div>
+    <div class="result">
+      <h3>📋 Summary</h3>
+      <div id="summary"></div>
     </div>
+  </div>
 </body>
 ```
 
@@ -133,34 +134,34 @@ This creates a clean drag-and-drop interface with styled results sections for th
 Our application can then use the `InferenceClient` from `huggingface.js` to call the transcription and summarization functions.
 
 ```javascript
-import { InferenceClient } from 'https://esm.sh/@huggingface/inference';
+import { InferenceClient } from "https://esm.sh/@huggingface/inference";
 
 // Access the token from Hugging Face Spaces secrets
 const HF_TOKEN = window.huggingface?.variables?.HF_TOKEN;
 // Or if you're running locally, you can set it as an environment variable
 // const HF_TOKEN = process.env.HF_TOKEN;
 
-document.getElementById('file').onchange = async (e) => {
-    if (!e.target.files[0]) return;
-    
-    const file = e.target.files[0];
-    
-    show(document.getElementById('loading'));
-    hide(document.getElementById('results'), document.getElementById('error'));
-    
-    try {
-        const transcript = await transcribe(file);
-        const summary = await summarize(transcript);
-
-        document.getElementById('transcript').textContent = transcript;
-        document.getElementById('summary').textContent = summary;
-        
-        hide(document.getElementById('loading'));
-        show(document.getElementById('results'));
-    } catch (error) {
-        hide(document.getElementById('loading'));
-        showError(`Error: ${error.message}`);
-    }
+document.getElementById("file").onchange = async (e) => {
+  if (!e.target.files[0]) return;
+
+  const file = e.target.files[0];
+
+  show(document.getElementById("loading"));
+  hide(document.getElementById("results"), document.getElementById("error"));
+
+  try {
+    const transcript = await transcribe(file);
+    const summary = await summarize(transcript);
+
+    document.getElementById("transcript").textContent = transcript;
+    document.getElementById("summary").textContent = summary;
+
+    hide(document.getElementById("loading"));
+    show(document.getElementById("results"));
+  } catch (error) {
+    hide(document.getElementById("loading"));
+    showError(`Error: ${error.message}`);
+  }
 };
 ```
 
@@ -183,17 +184,16 @@ Now let's implement the transcription using OpenAI's `whisper-large-v3` model fo
 def transcribe_audio(audio_file_path):
     """Transcribe audio using fal.ai for speed"""
     client = InferenceClient(provider="auto")
-    
+
     # Pass the file path directly - the client handles file reading
     transcript = client.automatic_speech_recognition(
         audio=audio_file_path,
         model="openai/whisper-large-v3"
     )
-    
+
     return transcript.text
 ```
 
-
 </hfoption>
 <hfoption id="javascript">
 
@@ -203,22 +203,21 @@ Now let's implement the transcription using OpenAI's `whisper-large-v3` model fo
 > We'll use the `auto` provider to automatically select the first available provider for the model. You can define your own priority list of providers in the [Inference Providers](https://huggingface.co/settings/inference-providers) page.
 
 ```javascript
-import { InferenceClient } from 'https://esm.sh/@huggingface/inference';
+import { InferenceClient } from "https://esm.sh/@huggingface/inference";
 
 async function transcribe(file) {
-    const client = new InferenceClient(HF_TOKEN);
-
-    const output = await client.automaticSpeechRecognition({
-        data: file,
-        model: "openai/whisper-large-v3-turbo",
-        provider: "auto"
-    });
-    
-    return output.text || output || 'Transcription completed';
+  const client = new InferenceClient(HF_TOKEN);
+
+  const output = await client.automaticSpeechRecognition({
+    data: file,
+    model: "openai/whisper-large-v3-turbo",
+    provider: "auto",
+  });
+
+  return output.text || output || "Transcription completed";
 }
 ```
 
-
 </hfoption>
 </hfoptions>
 
@@ -228,48 +227,49 @@ async function transcribe(file) {
 <hfoption id="python">
 
 Next, we'll use a powerful language model like `deepseek-ai/DeepSeek-R1-0528` from DeepSeek via an Inference Provider, and just like in the previous step, we'll use the `auto` provider to automatically select the first available provider for the model.
+We'll also use the `:fastest` policy to make sure we select the best performing provider for this model.
 We will define a custom prompt to ensure the output is formatted as a summary with action items and decisions made:
 
 ```python
 def generate_summary(transcript):
     """Generate summary using an Inference Provider"""
     client = InferenceClient(provider="auto")
-    
+
     prompt = f"""
     Analyze this meeting transcript and provide:
     1. A concise summary of key points
     2. Action items with responsible parties
     3. Important decisions made
-    
+
     Transcript: {transcript}
-    
+
     Format with clear sections:
     ## Summary
-    ## Action Items  
+    ## Action Items
     ## Decisions Made
     """
-    
+
     response = client.chat.completions.create(
-        model="deepseek-ai/DeepSeek-R1-0528",
+        model="deepseek-ai/DeepSeek-R1-0528:fastest",
         messages=[{"role": "user", "content": prompt}],
         max_tokens=1000
     )
-    
+
     return response.choices[0].message.content
 ```
 
-
 </hfoption>
 <hfoption id="javascript">
 
 Next, we'll use a powerful language model like `deepseek-ai/DeepSeek-R1-0528` from DeepSeek via an Inference Provider, and just like in the previous step, we'll use the `auto` provider to automatically select the first available provider for the model.
+We'll also use the `:fastest` policy to make sure we select the best performing provider for this model.
 We will define a custom prompt to ensure the output is formatted as a summary with action items and decisions made:
 
 ```javascript
 async function summarize(transcript) {
-    const client = new InferenceClient(HF_TOKEN);
+  const client = new InferenceClient(HF_TOKEN);
 
-    const prompt = `Analyze this meeting transcript and provide:
+  const prompt = `Analyze this meeting transcript and provide:
     1. A concise summary of key points
     2. Action items with responsible parties
     3. Important decisions made
@@ -281,24 +281,30 @@ async function summarize(transcript) {
     ## Action Items  
     ## Decisions Made`;
 
-    const response = await client.chatCompletion({
-        model: "deepseek-ai/DeepSeek-R1-0528",
-        messages: [
-            {
-                role: "user", 
-                content: prompt
-            }
-        ],
-        max_tokens: 1000
-    }, {
-        provider: "auto"
-    });
-    
-    return response.choices?.[0]?.message?.content || response || 'No summary available';
+  const response = await client.chatCompletion(
+    {
+      model: "deepseek-ai/DeepSeek-R1-0528:fastest",
+      messages: [
+        {
+          role: "user",
+          content: prompt,
+        },
+      ],
+      max_tokens: 1000,
+    },
+    {
+      provider: "auto",
+    }
+  );
+
+  return (
+    response.choices?.[0]?.message?.content ||
+    response ||
+    "No summary available"
+  );
 }
 ```
 
-
 </hfoption>
 </hfoptions>
 
@@ -338,17 +344,17 @@ def generate_summary(transcript):
     1. A concise summary of key points
     2. Action items with responsible parties
     3. Important decisions made
-    
+
     Transcript: {transcript}
-    
+
     Format with clear sections:
     ## Summary
-    ## Action Items  
+    ## Action Items
     ## Decisions Made
     """
 
     response = client.chat.completions.create(
-        model="deepseek-ai/DeepSeek-R1-0528",
+        model="deepseek-ai/DeepSeek-R1-0528:fastest",
         messages=[{"role": "user", "content": prompt}],
         max_tokens=1000,
     )
@@ -417,100 +423,149 @@ For JavaScript deployment, create a simple static HTML file:
 ```html
 <!DOCTYPE html>
 <html>
-<head>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <head>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>🎤 AI Meeting Notes</title>
     <style>
-        body { font-family: system-ui; max-width: 600px; margin: 50px auto; padding: 20px; }
-        .upload { border: 2px dashed #ccc; padding: 40px; text-align: center; margin: 20px 0; cursor: pointer; }
-        .upload:hover { border-color: #007bff; }
-        button { background: #007bff; color: white; border: none; padding: 10px 20px; border-radius: 4px; cursor: pointer; }
-        .loading { display: none; text-align: center; margin: 20px 0; }
-        .results { display: none; margin-top: 20px; }
-        .result { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 4px; }
-        .error { color: red; background: #ffe6e6; padding: 15px; border-radius: 4px; display: none; }
-        input[type="file"] { display: none; }
+      body {
+        font-family: system-ui;
+        max-width: 600px;
+        margin: 50px auto;
+        padding: 20px;
+      }
+      .upload {
+        border: 2px dashed #ccc;
+        padding: 40px;
+        text-align: center;
+        margin: 20px 0;
+        cursor: pointer;
+      }
+      .upload:hover {
+        border-color: #007bff;
+      }
+      button {
+        background: #007bff;
+        color: white;
+        border: none;
+        padding: 10px 20px;
+        border-radius: 4px;
+        cursor: pointer;
+      }
+      .loading {
+        display: none;
+        text-align: center;
+        margin: 20px 0;
+      }
+      .results {
+        display: none;
+        margin-top: 20px;
+      }
+      .result {
+        background: #f5f5f5;
+        padding: 15px;
+        margin: 10px 0;
+        border-radius: 4px;
+      }
+      .error {
+        color: red;
+        background: #ffe6e6;
+        padding: 15px;
+        border-radius: 4px;
+        display: none;
+      }
+      input[type="file"] {
+        display: none;
+      }
     </style>
-</head>
-<body>
+  </head>
+  <body>
     <h1>🎤 AI Meeting Notes</h1>
-       
+
     <div class="upload" onclick="document.getElementById('file').click()">
-        <input type="file" id="file" accept="audio/*">
-        <p>Upload audio file</p>
-        <button type="button">Choose File</button>
+      <input type="file" id="file" accept="audio/*" />
+      <p>Upload audio file</p>
+      <button type="button">Choose File</button>
     </div>
-    
+
     <div class="loading" id="loading">Processing...</div>
     <div class="error" id="error"></div>
-    
+
     <div class="results" id="results">
-        <div class="result">
-            <h3>📝 Transcript</h3>
-            <div id="transcript"></div>
-        </div>
-        <div class="result">
-            <h3>📋 Summary</h3>
-            <div id="summary"></div>
-        </div>
+      <div class="result">
+        <h3>📝 Transcript</h3>
+        <div id="transcript"></div>
+      </div>
+      <div class="result">
+        <h3>📋 Summary</h3>
+        <div id="summary"></div>
+      </div>
     </div>
 
     <script type="module">
-        import { InferenceClient } from 'https://esm.sh/@huggingface/inference';
-        
-        // Access the token from Hugging Face Spaces secrets
-        const HF_TOKEN = window.huggingface?.variables?.HF_TOKEN;
-        
-        // Add error handling for missing token
-        if (!HF_TOKEN) {
-            showError('HF_TOKEN not configured. Please add it in Space settings.');
-            return;
-        }
-        
-        document.getElementById('file').onchange = async (e) => {
-            if (!e.target.files[0]) return;
-            
-            const file = e.target.files[0];
-            
-            show(document.getElementById('loading'));
-            hide(document.getElementById('results'), document.getElementById('error'));
-            
-            try {
-                console.log('🎤 Starting transcription...');
-                const transcript = await transcribe(file);
-                console.log('✅ Transcription completed:', transcript.substring(0, 100) + '...');
-                
-                console.log('🖊️ Starting summarization...');
-                const summary = await summarize(transcript);
-                console.log('✅ Summary completed:', summary.substring(0, 100) + '...');
-                
-                document.getElementById('transcript').textContent = transcript;
-                document.getElementById('summary').textContent = summary;
-                
-                hide(document.getElementById('loading'));
-                show(document.getElementById('results'));
-            } catch (error) {
-                hide(document.getElementById('loading'));
-                showError(`Error: ${error.message}`);
-            }
-        };
-        
-        async function transcribe(file) {
-            const client = new InferenceClient(HF_TOKEN);
-
-            const output = await client.automaticSpeechRecognition({
-                data: file,
-                model: "openai/whisper-large-v3-turbo",
-                provider: "auto"
-            });
-            
-            return output.text || output || 'Transcription completed';
+      import { InferenceClient } from "https://esm.sh/@huggingface/inference";
+
+      // Access the token from Hugging Face Spaces secrets
+      const HF_TOKEN = window.huggingface?.variables?.HF_TOKEN;
+
+      // Add error handling for missing token
+      if (!HF_TOKEN) {
+        showError("HF_TOKEN not configured. Please add it in Space settings.");
+        return;
+      }
+
+      document.getElementById("file").onchange = async (e) => {
+        if (!e.target.files[0]) return;
+
+        const file = e.target.files[0];
+
+        show(document.getElementById("loading"));
+        hide(
+          document.getElementById("results"),
+          document.getElementById("error")
+        );
+
+        try {
+          console.log("🎤 Starting transcription...");
+          const transcript = await transcribe(file);
+          console.log(
+            "✅ Transcription completed:",
+            transcript.substring(0, 100) + "..."
+          );
+
+          console.log("🖊️ Starting summarization...");
+          const summary = await summarize(transcript);
+          console.log(
+            "✅ Summary completed:",
+            summary.substring(0, 100) + "..."
+          );
+
+          document.getElementById("transcript").textContent = transcript;
+          document.getElementById("summary").textContent = summary;
+
+          hide(document.getElementById("loading"));
+          show(document.getElementById("results"));
+        } catch (error) {
+          hide(document.getElementById("loading"));
+          showError(`Error: ${error.message}`);
         }
-        
-        async function summarize(transcript) {
-            const client = new InferenceClient(HF_TOKEN);
+      };
+
+      async function transcribe(file) {
+        const client = new InferenceClient(HF_TOKEN);
 
-            const prompt = `Analyze this meeting transcript and provide:
+        const output = await client.automaticSpeechRecognition({
+          data: file,
+          model: "openai/whisper-large-v3-turbo",
+          provider: "auto",
+        });
+
+        return output.text || output || "Transcription completed";
+      }
+
+      async function summarize(transcript) {
+        const client = new InferenceClient(HF_TOKEN);
+
+        const prompt = `Analyze this meeting transcript and provide:
             1. A concise summary of key points
             2. Action items with responsible parties
             3. Important decisions made
@@ -522,31 +577,38 @@ For JavaScript deployment, create a simple static HTML file:
             ## Action Items  
             ## Decisions Made`;
 
-            const response = await client.chatCompletion({
-                model: "deepseek-ai/DeepSeek-R1-0528",
-                messages: [
-                    {
-                        role: "user", 
-                        content: prompt
-                    }
-                ],
-                max_tokens: 1000
-            }, {
-                provider: "auto"
-            });
-            
-            return response.choices?.[0]?.message?.content || response || 'No summary available';
-        }
-        
-        const show = el => el.style.display = 'block';
-        const hide = (...els) => els.forEach(el => el.style.display = 'none');
-        const showError = msg => {
-            const error = document.getElementById('error');
-            error.innerHTML = msg;
-            show(error);
-        };
+        const response = await client.chatCompletion(
+          {
+            model: "deepseek-ai/DeepSeek-R1-0528:fastest",
+            messages: [
+              {
+                role: "user",
+                content: prompt,
+              },
+            ],
+            max_tokens: 1000,
+          },
+          {
+            provider: "auto",
+          }
+        );
+
+        return (
+          response.choices?.[0]?.message?.content ||
+          response ||
+          "No summary available"
+        );
+      }
+
+      const show = (el) => (el.style.display = "block");
+      const hide = (...els) => els.forEach((el) => (el.style.display = "none"));
+      const showError = (msg) => {
+        const error = document.getElementById("error");
+        error.innerHTML = msg;
+        show(error);
+      };
     </script>
-</body>
+  </body>
 </html>
 ```
 
diff --git a/docs/inference-providers/index.md b/docs/inference-providers/index.md
index 090d9ebad..de0ccc87c 100644
--- a/docs/inference-providers/index.md
+++ b/docs/inference-providers/index.md
@@ -24,12 +24,12 @@ Our platform integrates with leading AI infrastructure providers, giving you acc
 | [Nebius](./providers/nebius)                 |          ✅           |          ✅           |         ✅         |      ✅       |               |                |
 | [Novita](./providers/novita)                 |          ✅           |          ✅           |                    |               |      ✅       |                |
 | [Nscale](./providers/nscale)                 |          ✅           |          ✅           |                    |      ✅       |               |                |
-| [Public AI](./providers/publicai)             |          ✅           |                       |                    |               |               |                |
+| [Public AI](./providers/publicai)            |          ✅           |                       |                    |               |               |                |
 | [Replicate](./providers/replicate)           |                       |                       |                    |      ✅       |      ✅       |       ✅       |
 | [SambaNova](./providers/sambanova)           |          ✅           |                       |         ✅         |               |               |                |
-| [Scaleway](./providers/scaleway)           |          ✅           |                       |         ✅         |               |               |                |
+| [Scaleway](./providers/scaleway)             |          ✅           |                       |         ✅         |               |               |                |
 | [Together](./providers/together)             |          ✅           |          ✅           |                    |      ✅       |               |                |
-| [Z.ai](./providers/zai-org)                  |          ✅           |          ✅           |                     |               |               |                |
+| [Z.ai](./providers/zai-org)                  |          ✅           |          ✅           |                    |               |               |                |
 
 ## Why Choose Inference Providers?
 
@@ -63,7 +63,7 @@ Here's what you can build:
 
 Inference Providers works with your existing development workflow. Whether you prefer Python, JavaScript, or direct HTTP calls, we provide native SDKs and OpenAI-compatible APIs to get you up and running quickly.
 
-We'll walk through a practical example using [deepseek-ai/DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324), a state-of-the-art open-weights conversational model.
+We'll walk through a practical example using [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b), a state-of-the-art open-weights conversational model.
 
 ### Inference Playground
 
@@ -100,7 +100,13 @@ pip install huggingface_hub
 hf auth login # get a read token from hf.co/settings/tokens
 ```
 
-You can now use the the client with a Python interpreter:
+You can now use the the client with a Python interpreter.
+
+By default, our system automatically routes your request to the first available provider for the specified model, following your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+
+You can change the provider selection policy by appending `:fastest` (selects the provider with highest throughput) or `:cheapest` (selects the provider with lowest price per output token) to the model id (e.g., `openai/gpt-oss-120b:fastest`).
+
+You can also select the provider of your choice by appending the provider name to the model id (e.g. `"openai/gpt-oss-120b:sambanova"`).
 
 ```python
 import os
@@ -109,7 +115,7 @@ from huggingface_hub import InferenceClient
 client = InferenceClient()
 
 completion = client.chat.completions.create(
-    model="deepseek-ai/DeepSeek-V3-0324",
+    model="openai/gpt-oss-120b",
     messages=[
         {
             "role": "user",
@@ -127,7 +133,11 @@ print(completion.choices[0].message)
 
 If you're already using OpenAI's Python client, then you need a **drop-in OpenAI replacement**. Just swap-out the base URL to instantly access hundreds of additional open-weights models through our provider network.
 
-Our system automatically routes your request to the most popular provider for the specified model. You can also select the provider of your choice by appending it to the model id (e.g. `"deepseek-ai/DeepSeek-V3-0324:sambanova"`).
+By default, our system automatically routes your request to the first available provider for the specified model, following your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+
+You can change the provider selection policy by appending `:fastest` (selects the provider with highest throughput) or `:cheapest` (selects the provider with lowest price per output token) to the model id (e.g., `openai/gpt-oss-120b:fastest`).
+
+You can also select the provider of your choice by appending the provider name to the model id (e.g. `"openai/gpt-oss-120b:sambanova"`).
 
 ```python
 import os
@@ -139,7 +149,7 @@ client = OpenAI(
 )
 
 completion = client.chat.completions.create(
-    model="deepseek-ai/DeepSeek-V3-0324",
+    model="openai/gpt-oss-120b:fastest",
     messages=[
         {
             "role": "user",
@@ -155,7 +165,11 @@ completion = client.chat.completions.create(
 
 For maximum control and interoperability with custom frameworks, use our OpenAI-compatible REST API directly.
 
-Our routing system automatically selects the most popular available provider for your chosen model. You can also select the provider of your choice by appending it to the model id (e.g. `"deepseek-ai/DeepSeek-V3-0324:novita"`).
+By default, our system automatically routes your request to the first available provider for the specified model, following your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+
+You can change the provider selection policy by appending `:fastest` (selects the provider with highest throughput) or `:cheapest` (selects the provider with lowest price per output token) to the model id (e.g., `openai/gpt-oss-120b:fastest`).
+
+You can also select the provider of your choice by appending the provider name to the model id (e.g. `"openai/gpt-oss-120b:sambanova"`).
 
 ```python
 import os
@@ -170,7 +184,7 @@ payload = {
             "content": "How many 'G's in 'huggingface'?"
         }
     ],
-    "model": "deepseek-ai/DeepSeek-V3-0324",
+    "model": "openai/gpt-oss-120b:fastest",
 }
 
 response = requests.post(API_URL, headers=headers, json=payload)
@@ -197,7 +211,13 @@ Install with NPM:
 npm install @huggingface/inference
 ```
 
-Then use the client with Javascript:
+Then use the client with Javascript.
+
+By default, our system automatically routes your request to the first available provider for the specified model, following your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+
+You can change the provider selection policy by appending `:fastest` (selects the provider with highest throughput) or `:cheapest` (selects the provider with lowest price per output token) to the model id (e.g., `openai/gpt-oss-120b:fastest`).
+
+You can also select the provider of your choice by appending the provider name to the model id (e.g. `"openai/gpt-oss-120b:sambanova"`).
 
 ```js
 import { InferenceClient } from "@huggingface/inference";
@@ -205,7 +225,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient(process.env.HF_TOKEN);
 
 const chatCompletion = await client.chatCompletion({
-  model: "deepseek-ai/DeepSeek-V3-0324",
+  model: "openai/gpt-oss-120b:fastest",
   messages: [
     {
       role: "user",
@@ -221,7 +241,13 @@ console.log(chatCompletion.choices[0].message);
 
 <hfoption id="openai">
 
-If you're already using OpenAI's Javascript client, then you need a **drop-in OpenAI replacement**. Just swap-out the base URL to instantly access hundreds of additional open-weights models through our provider network. Our system automatically routes your request to the most popular provider for the specified model. You can also select the provider of your choice by appending it to the model id (e.g. `"deepseek-ai/DeepSeek-V3-0324:nebius"`).
+If you're already using OpenAI's Javascript client, then you need a **drop-in OpenAI replacement**. Just swap-out the base URL to instantly access hundreds of additional open-weights models through our provider network.
+
+By default, our system automatically routes your request to the first available provider for the specified model, following your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+
+You can change the provider selection policy by appending `:fastest` (selects the provider with highest throughput) or `:cheapest` (selects the provider with lowest price per output token) to the model id (e.g., `openai/gpt-oss-120b:fastest`).
+
+You can also select the provider of your choice by appending the provider name to the model id (e.g. `"openai/gpt-oss-120b:sambanova"`).
 
 ```javascript
 import OpenAI from "openai";
@@ -232,7 +258,7 @@ const client = new OpenAI({
 });
 
 const completion = await client.chat.completions.create({
-  model: "deepseek-ai/DeepSeek-V3-0324",
+  model: "openai/gpt-oss-120b:fastest",
   messages: [
     {
       role: "user",
@@ -250,7 +276,11 @@ console.log(completion.choices[0].message.content);
 
 For lightweight applications or custom implementations, use our REST API directly with standard fetch.
 
-Our routing system automatically selects the most popular available provider for your chosen model. You can also select the provider of your choice by appending it to the model id (e.g. `"deepseek-ai/DeepSeek-V3-0324:fireworks-ai"`).
+By default, our system automatically routes your request to the first available provider for the specified model, following your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+
+You can change the provider selection policy by appending `:fastest` (selects the provider with highest throughput) or `:cheapest` (selects the provider with lowest price per output token) to the model id (e.g., `openai/gpt-oss-120b:fastest`).
+
+You can also select the provider of your choice by appending the provider name to the model id (e.g. `"openai/gpt-oss-120b:sambanova"`).
 
 ```js
 import fetch from "node-fetch";
@@ -264,7 +294,7 @@ const response = await fetch(
       "Content-Type": "application/json",
     },
     body: JSON.stringify({
-      model: "deepseek-ai/DeepSeek-V3-0324",
+      model: "openai/gpt-oss-120b:fastest",
       messages: [
         {
           role: "user",
@@ -284,7 +314,12 @@ console.log(await response.json());
 #### HTTP / cURL
 
 For testing, debugging, or integrating with any HTTP client, here's the raw REST API format.
-Our routing system automatically selects the most popular available provider for your chosen model. You can also select the provider of your choice by appending it to the model id (e.g. `"deepseek-ai/DeepSeek-V3-0324:fireworks-ai"`).
+
+By default, our system automatically routes your request to the first available provider for the specified model, following your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+
+You can change the provider selection policy by appending `:fastest` (selects the provider with highest throughput) or `:cheapest` (selects the provider with lowest price per output token) to the model id (e.g., `openai/gpt-oss-120b:fastest`).
+
+You can also select the provider of your choice by appending the provider name to the model id (e.g. `"openai/gpt-oss-120b:sambanova"`).
 
 ```bash
 curl https://router.huggingface.co/v1/chat/completions \
@@ -297,7 +332,7 @@ curl https://router.huggingface.co/v1/chat/completions \
                 "content": "How many G in huggingface?"
             }
         ],
-        "model": "deepseek-ai/DeepSeek-V3-0324",
+        "model": "openai/gpt-oss-120b:fastest",
         "stream": false
     }'
 ```
@@ -375,14 +410,14 @@ const client = new InferenceClient(process.env.HF_TOKEN);
 
 // Explicit provider selection
 await client.chatCompletion({
-  model: "meta-llama/Llama-3.1-8B-Instruct",
+  model: "deepseek-ai/DeepSeek-R1",
   provider: "sambanova", // Specific provider
   messages: [{ role: "user", content: "Hello!" }],
 });
 
 // Automatic provider selection (default: "auto")
 await client.chatCompletion({
-  model: "meta-llama/Llama-3.1-8B-Instruct",
+  model: "deepseek-ai/DeepSeek-R1",
   // Defaults to "auto" selection of the provider
   // provider="auto",
   messages: [{ role: "user", content: "Hello!" }],
@@ -401,14 +436,14 @@ client = InferenceClient(token=os.environ["HF_TOKEN"])
 
 # Explicit provider selection
 result = client.chat_completion(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="deepseek-ai/DeepSeek-R1",
     provider="sambanova",  # Specific provider
     messages=[{"role": "user", "content": "Hello!"}],
 )
 
 # Automatic provider selection (default: "auto")
 result = client.chat_completion(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="deepseek-ai/DeepSeek-R1",
     # Defaults to "auto" selection of the provider
     # provider="auto",
     messages=[{"role": "user", "content": "Hello!"}],
@@ -421,13 +456,19 @@ result = client.chat_completion(
 
 **Provider Selection Policy:**
 
-- `provider: "auto"` (default): Selects the first available provider for the model, sorted by your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers)
-- `provider: "specific-provider"`: Forces use of a specific provider (e.g., "together", "replicate", "fal-ai", ...)
+- `provider: "auto"` (default): Selects the first available provider for the model, sorted by your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+- `provider: "specific-provider"`: Forces use of a specific provider (e.g., "together", "replicate", "fal-ai", ...).
 
 ### Alternative: OpenAI-Compatible Chat Completions Endpoint (Chat Only)
 
 If you prefer to work with familiar OpenAI APIs or want to migrate existing chat completion code with minimal changes, we offer a drop-in compatible endpoint that handles all provider selection automatically on the server side.
 
+By default, the selected provider is the first available provider for the selected model, sorted by your preference order in [Inference Provider settings](https://hf.co/settings/inference-providers).
+You can change that policy by adding a suffix to the model name:
+
+- `:fastest` selects the fastest provider for the model (highest throughput in tokens per second)
+- `:cheapest` selects the most cost-efficient provider for the model (lowest price per output tokens)
+
 **Note**: This OpenAI-compatible endpoint is currently available for chat completion tasks only. For other tasks like text-to-image, embeddings, or speech processing, use the Hugging Face inference clients shown above.
 
 <hfoptions id="openai-compatible">
@@ -443,7 +484,7 @@ const client = new OpenAI({
 });
 
 const completion = await client.chat.completions.create({
-  model: "meta-llama/Llama-3.1-8B-Instruct",
+  model: "deepseek-ai/DeepSeek-R1:fastest",
   messages: [{ role: "user", content: "Hello!" }],
 });
 ```
@@ -462,7 +503,7 @@ client = OpenAI(
 )
 
 completion = client.chat.completions.create(
-    model="meta-llama/Llama-3.1-8B-Instruct",
+    model="deepseek-ai/DeepSeek-R1:fastest",
     messages=[{"role": "user", "content": "Hello!"}],
 )
 
@@ -480,7 +521,7 @@ curl https://router.huggingface.co/v1/chat/completions \
   -H "Authorization: Bearer $HF_TOKEN" \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "model": "deepseek-ai/DeepSeek-R1:fastest",
     "messages": [
       {
         "role": "user",