diff --git a/docs/api-inference/tasks/audio-classification.md b/docs/api-inference/tasks/audio-classification.md
index 46e8d6b54..47144b754 100644
--- a/docs/api-inference/tasks/audio-classification.md
+++ b/docs/api-inference/tasks/audio-classification.md
@@ -29,7 +29,9 @@ For more details about the `audio-classification` task, check out its [dedicated
 
 ### Recommended models
 
+- [speechbrain/google_speech_command_xvector](https://huggingface.co/speechbrain/google_speech_command_xvector): An easy-to-use model for command recognition.
 - [ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition](https://huggingface.co/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition): An emotion recognition model.
+- [facebook/mms-lid-126](https://huggingface.co/facebook/mms-lid-126): A language identification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=audio-classification&sort=trending).
 
@@ -40,7 +42,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition \
+curl https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector \
 	-X POST \
 	--data-binary '@sample1.flac' \
 	-H 'Authorization: Bearer hf_***'
@@ -51,14 +53,14 @@ curl https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers=headers, data=data)
-    return response.json()
+	with open(filename, "rb") as f:
+		data = f.read()
+	response = requests.post(API_URL, headers=headers, data=data)
+	return response.json()
 
 output = query("sample1.flac")
 ```
@@ -71,7 +73,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 async function query(filename) {
 	const data = fs.readFileSync(filename);
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
+		"https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/automatic-speech-recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md
index c671d7f88..c28a10f14 100644
--- a/docs/api-inference/tasks/automatic-speech-recognition.md
+++ b/docs/api-inference/tasks/automatic-speech-recognition.md
@@ -30,7 +30,7 @@ For more details about the `automatic-speech-recognition` task, check out its [d
 ### Recommended models
 
 - [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3): A powerful ASR model by OpenAI.
-- [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1): Powerful speaker diarization model.
+- [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large): An end-to-end model that performs ASR and Speech Translation by MetaAI.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending).
 
@@ -41,7 +41,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/openai/whisper-large-v3 \
+curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3 \
 	-X POST \
 	--data-binary '@sample1.flac' \
 	-H 'Authorization: Bearer hf_***'
@@ -52,14 +52,14 @@ curl https://api-inference.huggingface.co/models/openai/whisper-large-v3 \
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers=headers, data=data)
-    return response.json()
+	with open(filename, "rb") as f:
+		data = f.read()
+	response = requests.post(API_URL, headers=headers, data=data)
+	return response.json()
 
 output = query("sample1.flac")
 ```
@@ -68,11 +68,30 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const data = fs.readFileSync("sample1.flac");
+
+const output = await client.automaticSpeechRecognition({
+	data,
+	model: "openai/whisper-large-v3",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(filename) {
 	const data = fs.readFileSync(filename);
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/openai/whisper-large-v3",
+		"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -143,5 +162,5 @@ For more information about Inference API headers, check out the parameters [guid
 | **text** | _string_ | The recognized text. |
 | **chunks** | _object[]_ | When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ | A chunk of text identified by the model |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;timestamps** | _number[]_ | The start and end timestamps corresponding with the text |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;timestamp** | _number[]_ | The start and end timestamps corresponding with the text |
 
diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index a3233b92c..ef9a66fa9 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -22,15 +22,15 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/api-inferen
 #### Conversational Large Language Models (LLMs)
 
 - [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
+- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
 - [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
 - [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
 - [PowerInfer/SmallThinker-3B-Preview](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview): A very powerful model with reasoning capabilities.
-- [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct): Strong text generation model to follow instructions.
 - [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
+- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 #### Conversational Vision-Language Models (VLMs)
 
-- [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct): Strong image-text-to-text model.
 - [Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview): Image-text-to-text model with reasoning capabilities.
 
 ### API Playground
@@ -63,7 +63,7 @@ The API supports:
 
 <curl>
 ```bash
-curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
+curl 'https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/chat/completions' \
 -H 'Authorization: Bearer hf_***' \
 -H 'Content-Type: application/json' \
 --data '{
@@ -85,7 +85,10 @@ Using `huggingface_hub`:
 ```py
 from huggingface_hub import InferenceClient
 
-client = InferenceClient(api_key="hf_***")
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
 
 messages = [
 	{
@@ -95,7 +98,7 @@ messages = [
 ]
 
 stream = client.chat.completions.create(
-    model="google/gemma-2-2b-it", 
+	model="google/gemma-2-2b-it", 
 	messages=messages, 
 	max_tokens=500,
 	stream=True
@@ -110,7 +113,7 @@ Using `openai`:
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://api-inference.huggingface.co/v1/",
+	base_url="https://router.huggingface.co/hf-inference/v1",
 	api_key="hf_***"
 )
 
@@ -129,7 +132,7 @@ stream = client.chat.completions.create(
 )
 
 for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+	print(chunk.choices[0].delta.content, end="")
 ```
 
 To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
@@ -152,7 +155,8 @@ const stream = client.chatCompletionStream({
 			content: "What is the capital of France?"
 		}
 	],
-	max_tokens: 500
+	provider: "hf-inference",
+	max_tokens: 500,
 });
 
 for await (const chunk of stream) {
@@ -169,8 +173,8 @@ Using `openai`:
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://api-inference.huggingface.co/v1/",
-    apiKey: "hf_***"
+	baseURL: "https://router.huggingface.co/hf-inference/v1",
+	apiKey: "hf_***"
 });
 
 let out = "";
@@ -210,11 +214,11 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 
 <curl>
 ```bash
-curl 'https://api-inference.huggingface.co/models/Qwen/Qwen2-VL-7B-Instruct/v1/chat/completions' \
+curl 'https://router.huggingface.co/hf-inference/models/Qwen/QVQ-72B-Preview/v1/chat/completions' \
 -H 'Authorization: Bearer hf_***' \
 -H 'Content-Type: application/json' \
 --data '{
-    "model": "Qwen/Qwen2-VL-7B-Instruct",
+    "model": "Qwen/QVQ-72B-Preview",
     "messages": [
 		{
 			"role": "user",
@@ -243,7 +247,10 @@ Using `huggingface_hub`:
 ```py
 from huggingface_hub import InferenceClient
 
-client = InferenceClient(api_key="hf_***")
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
 
 messages = [
 	{
@@ -264,7 +271,7 @@ messages = [
 ]
 
 stream = client.chat.completions.create(
-    model="Qwen/Qwen2-VL-7B-Instruct", 
+	model="Qwen/QVQ-72B-Preview", 
 	messages=messages, 
 	max_tokens=500,
 	stream=True
@@ -279,7 +286,7 @@ Using `openai`:
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://api-inference.huggingface.co/v1/",
+	base_url="https://router.huggingface.co/hf-inference/v1",
 	api_key="hf_***"
 )
 
@@ -302,14 +309,14 @@ messages = [
 ]
 
 stream = client.chat.completions.create(
-    model="Qwen/Qwen2-VL-7B-Instruct", 
+    model="Qwen/QVQ-72B-Preview", 
 	messages=messages, 
 	max_tokens=500,
 	stream=True
 )
 
 for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+	print(chunk.choices[0].delta.content, end="")
 ```
 
 To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
@@ -325,7 +332,7 @@ const client = new HfInference("hf_***");
 let out = "";
 
 const stream = client.chatCompletionStream({
-	model: "Qwen/Qwen2-VL-7B-Instruct",
+	model: "Qwen/QVQ-72B-Preview",
 	messages: [
 		{
 			role: "user",
@@ -343,7 +350,8 @@ const stream = client.chatCompletionStream({
 			]
 		}
 	],
-	max_tokens: 500
+	provider: "hf-inference",
+	max_tokens: 500,
 });
 
 for await (const chunk of stream) {
@@ -360,14 +368,14 @@ Using `openai`:
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://api-inference.huggingface.co/v1/",
-    apiKey: "hf_***"
+	baseURL: "https://router.huggingface.co/hf-inference/v1",
+	apiKey: "hf_***"
 });
 
 let out = "";
 
 const stream = await client.chat.completions.create({
-	model: "Qwen/Qwen2-VL-7B-Instruct",
+	model: "Qwen/QVQ-72B-Preview",
 	messages: [
 		{
 			role: "user",
@@ -415,18 +423,29 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 | **logprobs** | _boolean_ | Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. |
 | **max_tokens** | _integer_ | The maximum number of tokens that can be generated in the chat completion. |
 | **messages*** | _object[]_ | A list of messages comprising the conversation so far. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content*** | _unknown_ | One of the following: |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _string_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object[]_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _object_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text*** | _string_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: text. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;image_url*** | _object_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;url*** | _string_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: image_url. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name** | _string_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _unknown_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content*** | _unknown_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: text. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;image_url*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;url*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: image_url. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls*** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments*** | _unknown_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;description** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role*** | _string_ |  |
 | **presence_penalty** | _number_ | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics |
 | **response_format** | _unknown_ | One of the following: |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _object_ |  |
@@ -490,6 +509,7 @@ If `stream` is `false` (default), the response will be a JSON object with the fo
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _object_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_call_id** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls** | _object[]_ |  |
@@ -519,6 +539,7 @@ For more information about streaming, check out [this guide](https://huggingface
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _object_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_call_id** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _object_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls** | _object_ |  |
diff --git a/docs/api-inference/tasks/feature-extraction.md b/docs/api-inference/tasks/feature-extraction.md
index 29d3ca441..fc707ad85 100644
--- a/docs/api-inference/tasks/feature-extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -40,7 +40,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/thenlper/gte-large \
+curl https://router.huggingface.co/hf-inference/models/thenlper/gte-large \
 	-X POST \
 	-d '{"inputs": "Today is a sunny day and I will get some ice cream."}' \
 	-H 'Content-Type: application/json' \
@@ -49,10 +49,30 @@ curl https://api-inference.huggingface.co/models/thenlper/gte-large \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.feature_extraction(
+	model="thenlper/gte-large",
+	inputs="Today is a sunny day and I will get some ice cream.",
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/thenlper/gte-large"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -68,10 +88,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.featureExtraction({
+	model: "thenlper/gte-large",
+	inputs: "Today is a sunny day and I will get some ice cream.",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/thenlper/gte-large",
+		"https://router.huggingface.co/hf-inference/models/thenlper/gte-large",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -103,7 +140,9 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 
 | Payload |  |  |
 | :--- | :--- | :--- |
-| **inputs*** | _string_ | The text to embed. |
+| **inputs*** | _unknown_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _string[]_ |  |
 | **normalize** | _boolean_ |  |
 | **prompt_name** | _string_ | The name of the prompt that should be used by for encoding. If not set, no prompt will be applied.  Must be a key in the `sentence-transformers` configuration `prompts` dictionary.  For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...}, then the sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?" because the prompt text will be prepended before any text to encode. |
 | **truncate** | _boolean_ |  |
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index d4f6a5b97..70e7c256c 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -35,7 +35,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/FacebookAI/xlm-roberta-base \
+curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base \
 	-X POST \
 	-d '{"inputs": "The answer to the universe is [MASK]."}' \
 	-H 'Content-Type: application/json' \
@@ -44,10 +44,30 @@ curl https://api-inference.huggingface.co/models/FacebookAI/xlm-roberta-base \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.fill_mask(
+	model="FacebookAI/xlm-roberta-base",
+	inputs="The answer to the universe is [MASK].",
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/FacebookAI/xlm-roberta-base"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -63,10 +83,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.fillMask({
+	model: "FacebookAI/xlm-roberta-base",
+	inputs: "The answer to the universe is [MASK].",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/FacebookAI/xlm-roberta-base",
+		"https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index 262876a48..b65bb686a 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -36,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/google/vit-base-patch16-224 \
+curl https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224 \
 	-X POST \
 	--data-binary '@cats.jpg' \
 	-H 'Authorization: Bearer hf_***'
@@ -47,14 +47,14 @@ curl https://api-inference.huggingface.co/models/google/vit-base-patch16-224 \
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/google/vit-base-patch16-224"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers=headers, data=data)
-    return response.json()
+	with open(filename, "rb") as f:
+		data = f.read()
+	response = requests.post(API_URL, headers=headers, data=data)
+	return response.json()
 
 output = query("cats.jpg")
 ```
@@ -67,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 async function query(filename) {
 	const data = fs.readFileSync(filename);
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/google/vit-base-patch16-224",
+		"https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index b60e81e62..459b5037d 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -36,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small \
+curl https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small \
 	-X POST \
 	--data-binary '@cats.jpg' \
 	-H 'Authorization: Bearer hf_***'
@@ -47,14 +47,14 @@ curl https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-smal
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers=headers, data=data)
-    return response.json()
+	with open(filename, "rb") as f:
+		data = f.read()
+	response = requests.post(API_URL, headers=headers, data=data)
+	return response.json()
 
 output = query("cats.jpg")
 ```
@@ -67,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 async function query(filename) {
 	const data = fs.readFileSync(filename);
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small",
+		"https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md
index 75010bf82..ae4f7e7d9 100644
--- a/docs/api-inference/tasks/image-text-to-text.md
+++ b/docs/api-inference/tasks/image-text-to-text.md
@@ -24,7 +24,6 @@ For more details about the `image-text-to-text` task, check out its [dedicated p
 
 ### Recommended models
 
-- [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct): Strong image-text-to-text model.
 - [Qwen/QVQ-72B-Preview](https://huggingface.co/Qwen/QVQ-72B-Preview): Image-text-to-text model with reasoning capabilities.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-text-to-text&sort=trending).
@@ -36,7 +35,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/Qwen/Qwen2-VL-7B-Instruct \
+curl https://router.huggingface.co/hf-inference/models/Qwen/QVQ-72B-Preview \
 	-X POST \
 	-d '{"inputs": "Can you please let us know more details about your "}' \
 	-H 'Content-Type: application/json' \
@@ -49,12 +48,15 @@ Using `huggingface_hub`:
 ```py
 from huggingface_hub import InferenceClient
 
-client = InferenceClient(api_key="hf_***")
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
 
 messages = "\"Can you please let us know more details about your \""
 
 stream = client.chat.completions.create(
-    model="Qwen/Qwen2-VL-7B-Instruct", 
+	model="Qwen/QVQ-72B-Preview", 
 	messages=messages, 
 	max_tokens=500,
 	stream=True
@@ -69,21 +71,21 @@ Using `openai`:
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://api-inference.huggingface.co/v1/",
+	base_url="https://router.huggingface.co/hf-inference/v1",
 	api_key="hf_***"
 )
 
 messages = "\"Can you please let us know more details about your \""
 
 stream = client.chat.completions.create(
-    model="Qwen/Qwen2-VL-7B-Instruct", 
+    model="Qwen/QVQ-72B-Preview", 
 	messages=messages, 
 	max_tokens=500,
 	stream=True
 )
 
 for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+	print(chunk.choices[0].delta.content, end="")
 ```
 
 To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_text_to_text).
@@ -93,7 +95,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/Qwen/Qwen2-VL-7B-Instruct",
+		"https://router.huggingface.co/hf-inference/models/Qwen/QVQ-72B-Preview",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/object-detection.md b/docs/api-inference/tasks/object-detection.md
index a1ca592fa..b8fde8d08 100644
--- a/docs/api-inference/tasks/object-detection.md
+++ b/docs/api-inference/tasks/object-detection.md
@@ -35,7 +35,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/facebook/detr-resnet-50 \
+curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
 	-X POST \
 	--data-binary '@cats.jpg' \
 	-H 'Authorization: Bearer hf_***'
@@ -46,14 +46,14 @@ curl https://api-inference.huggingface.co/models/facebook/detr-resnet-50 \
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers=headers, data=data)
-    return response.json()
+	with open(filename, "rb") as f:
+		data = f.read()
+	response = requests.post(API_URL, headers=headers, data=data)
+	return response.json()
 
 output = query("cats.jpg")
 ```
@@ -66,7 +66,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 async function query(filename) {
 	const data = fs.readFileSync(filename);
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/facebook/detr-resnet-50",
+		"https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/question-answering.md b/docs/api-inference/tasks/question-answering.md
index 0222d8549..0cca700b3 100644
--- a/docs/api-inference/tasks/question-answering.md
+++ b/docs/api-inference/tasks/question-answering.md
@@ -26,6 +26,7 @@ For more details about the `question-answering` task, check out its [dedicated p
 
 - [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2): A robust baseline model for most question answering domains.
 - [distilbert/distilbert-base-cased-distilled-squad](https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad): Small yet robust model that can answer questions.
+- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A special model that can answer questions from tables.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
 
@@ -36,7 +37,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/deepset/roberta-base-squad2 \
+curl https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2 \
 	-X POST \
 	-d '{"inputs": { "question": "What is my name?", "context": "My name is Clara and I live in Berkeley." }}' \
 	-H 'Content-Type: application/json' \
@@ -45,10 +46,33 @@ curl https://api-inference.huggingface.co/models/deepset/roberta-base-squad2 \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.question_answering(
+	model="deepset/roberta-base-squad2",
+	inputs={
+	"question": "What is my name?",
+	"context": "My name is Clara and I live in Berkeley."
+},
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -67,10 +91,30 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.questionAnswering({
+	model: "deepset/roberta-base-squad2",
+	inputs: {
+	"question": "What is my name?",
+	"context": "My name is Clara and I live in Berkeley."
+},
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/deepset/roberta-base-squad2",
+		"https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
index b3ac11d4c..b55dfac70 100644
--- a/docs/api-inference/tasks/summarization.md
+++ b/docs/api-inference/tasks/summarization.md
@@ -25,6 +25,7 @@ For more details about the `summarization` task, check out its [dedicated page](
 ### Recommended models
 
 - [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn): A strong summarization model trained on English news articles. Excels at generating factual summaries.
+- [Falconsai/medical_summarization](https://huggingface.co/Falconsai/medical_summarization): A summarization model trained on medical articles.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=summarization&sort=trending).
 
@@ -35,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/facebook/bart-large-cnn \
+curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
 	-X POST \
 	-d '{"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."}' \
 	-H 'Content-Type: application/json' \
@@ -44,10 +45,30 @@ curl https://api-inference.huggingface.co/models/facebook/bart-large-cnn \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.summarization(
+	model="facebook/bart-large-cnn",
+	inputs="The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -63,10 +84,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.summarization({
+	model: "facebook/bart-large-cnn",
+	inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/facebook/bart-large-cnn",
+		"https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/table-question-answering.md b/docs/api-inference/tasks/table-question-answering.md
index 6d1c99bfb..8004ab9b0 100644
--- a/docs/api-inference/tasks/table-question-answering.md
+++ b/docs/api-inference/tasks/table-question-answering.md
@@ -24,6 +24,8 @@ For more details about the `table-question-answering` task, check out its [dedic
 
 ### Recommended models
 
+- [microsoft/tapex-base](https://huggingface.co/microsoft/tapex-base): A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.
+- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A robust table question answering model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=table-question-answering&sort=trending).
 
@@ -34,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/<REPO_ID> \
+curl https://router.huggingface.co/hf-inference/models/microsoft/tapex-base \
 	-X POST \
 	-d '{"inputs": { "query": "How many stars does the transformers repository have?", "table": { "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": [ "Python", "Python", "Rust, Python and NodeJS" ] } }}' \
 	-H 'Content-Type: application/json' \
@@ -43,10 +45,42 @@ curl https://api-inference.huggingface.co/models/<REPO_ID> \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.table_question_answering(
+	model="microsoft/tapex-base",
+	inputs={
+	"query": "How many stars does the transformers repository have?",
+	"table": {
+		"Repository": ["Transformers", "Datasets", "Tokenizers"],
+		"Stars": ["36542", "4512", "3934"],
+		"Contributors": ["651", "77", "34"],
+		"Programming language": [
+			"Python",
+			"Python",
+			"Rust, Python and NodeJS"
+		]
+	}
+},
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/<REPO_ID>"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -74,10 +108,39 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.tableQuestionAnswering({
+	model: "microsoft/tapex-base",
+	inputs: {
+	"query": "How many stars does the transformers repository have?",
+	"table": {
+		"Repository": ["Transformers", "Datasets", "Tokenizers"],
+		"Stars": ["36542", "4512", "3934"],
+		"Contributors": ["651", "77", "34"],
+		"Programming language": [
+			"Python",
+			"Python",
+			"Rust, Python and NodeJS"
+		]
+	}
+},
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/<REPO_ID>",
+		"https://router.huggingface.co/hf-inference/models/microsoft/tapex-base",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/text-classification.md b/docs/api-inference/tasks/text-classification.md
index 83cf43653..ed2458c0c 100644
--- a/docs/api-inference/tasks/text-classification.md
+++ b/docs/api-inference/tasks/text-classification.md
@@ -28,6 +28,7 @@ For more details about the `text-classification` task, check out its [dedicated
 - [ProsusAI/finbert](https://huggingface.co/ProsusAI/finbert): A sentiment analysis model specialized in financial sentiment.
 - [cardiffnlp/twitter-roberta-base-sentiment-latest](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest): A sentiment analysis model specialized in analyzing tweets.
 - [papluca/xlm-roberta-base-language-detection](https://huggingface.co/papluca/xlm-roberta-base-language-detection): A model that can classify languages.
+- [meta-llama/Prompt-Guard-86M](https://huggingface.co/meta-llama/Prompt-Guard-86M): A model that can classify text generation attacks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
 
@@ -38,7 +39,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english \
+curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english \
 	-X POST \
 	-d '{"inputs": "I like you. I love you"}' \
 	-H 'Content-Type: application/json' \
@@ -47,10 +48,30 @@ curl https://api-inference.huggingface.co/models/distilbert/distilbert-base-unca
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.text_classification(
+	model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+	inputs="I like you. I love you",
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -66,10 +87,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.textClassification({
+	model: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+	inputs: "I like you. I love you",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+		"https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md
index 2271a2119..dd9b5e2cf 100644
--- a/docs/api-inference/tasks/text-generation.md
+++ b/docs/api-inference/tasks/text-generation.md
@@ -27,11 +27,12 @@ For more details about the `text-generation` task, check out its [dedicated page
 ### Recommended models
 
 - [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
+- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
 - [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
 - [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
 - [PowerInfer/SmallThinker-3B-Preview](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview): A very powerful model with reasoning capabilities.
-- [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct): Strong text generation model to follow instructions.
 - [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
+- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
 
@@ -42,7 +43,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/google/gemma-2-2b-it \
+curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it \
 	-X POST \
 	-d '{"inputs": "Can you please let us know more details about your "}' \
 	-H 'Content-Type: application/json' \
@@ -51,10 +52,30 @@ curl https://api-inference.huggingface.co/models/google/gemma-2-2b-it \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.text_generation(
+	model="google/gemma-2-2b-it",
+	inputs="Can you please let us know more details about your ",
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-2b-it"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -70,10 +91,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.textGeneration({
+	model: "google/gemma-2-2b-it",
+	inputs: "Can you please let us know more details about your ",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/google/gemma-2-2b-it",
+		"https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/text-to-image.md b/docs/api-inference/tasks/text-to-image.md
index a23db455f..71bb3b141 100644
--- a/docs/api-inference/tasks/text-to-image.md
+++ b/docs/api-inference/tasks/text-to-image.md
@@ -25,6 +25,7 @@ For more details about the `text-to-image` task, check out its [dedicated page](
 ### Recommended models
 
 - [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
+- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
 - [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
@@ -36,7 +37,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev \
+curl https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev \
 	-X POST \
 	-d '{"inputs": "Astronaut riding a horse"}' \
 	-H 'Content-Type: application/json' \
@@ -48,22 +49,30 @@ curl https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev \
 Using `huggingface_hub`:
 ```py
 from huggingface_hub import InferenceClient
-client = InferenceClient("black-forest-labs/FLUX.1-dev", token="hf_***")
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
 
 # output is a PIL.Image object
-image = client.text_to_image("Astronaut riding a horse")
+image = client.text_to_image(
+	"Astronaut riding a horse",
+	model="black-forest-labs/FLUX.1-dev"
+)
 ```
 
 Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
 	return response.content
+
 image_bytes = query({
 	"inputs": "Astronaut riding a horse",
 })
@@ -78,10 +87,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const image = await client.textToImage({
+	model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+	provider: "hf-inference",
+});
+/// Use the generated image (it's a Blob)
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
+		"https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -117,9 +143,8 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _string_ | One prompt to guide what NOT to include in image generation. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width*** | _integer_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height*** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width** | _integer_ | The width in pixels of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height** | _integer_ | The height in pixels of the output image |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string_ | Override the scheduler with a compatible one. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ | Seed for the random number generator. |
 
diff --git a/docs/api-inference/tasks/token-classification.md b/docs/api-inference/tasks/token-classification.md
index 5925cce6c..eeecde32c 100644
--- a/docs/api-inference/tasks/token-classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -38,7 +38,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/dslim/bert-base-NER \
+curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
 	-X POST \
 	-d '{"inputs": "My name is Sarah Jessica Parker but you can call me Jessica"}' \
 	-H 'Content-Type: application/json' \
@@ -47,10 +47,30 @@ curl https://api-inference.huggingface.co/models/dslim/bert-base-NER \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.token_classification(
+	model="dslim/bert-base-NER",
+	inputs="My name is Sarah Jessica Parker but you can call me Jessica",
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/dslim/bert-base-NER"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -66,10 +86,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.tokenClassification({
+	model: "dslim/bert-base-NER",
+	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/dslim/bert-base-NER",
+		"https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index aa3910e5a..bc4939d7b 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -24,6 +24,7 @@ For more details about the `translation` task, check out its [dedicated page](ht
 
 ### Recommended models
 
+- [facebook/nllb-200-1.3B](https://huggingface.co/facebook/nllb-200-1.3B): Very powerful model that can translate many languages between each other, especially low-resource languages.
 - [google-t5/t5-base](https://huggingface.co/google-t5/t5-base): A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=translation&sort=trending).
@@ -35,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/google-t5/t5-base \
+curl https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B \
 	-X POST \
 	-d '{"inputs": "Меня зовут Вольфганг и я живу в Берлине"}' \
 	-H 'Content-Type: application/json' \
@@ -44,10 +45,30 @@ curl https://api-inference.huggingface.co/models/google-t5/t5-base \
 </curl>
 
 <python>
+Using `huggingface_hub`:
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+	provider="hf-inference",
+	api_key="hf_***"
+)
+
+result = client.translation(
+	model="facebook/nllb-200-1.3B",
+	inputs="Меня зовут Вольфганг и я живу в Берлине",
+	provider="hf-inference",
+)
+
+print(result)
+
+```
+
+Using `requests`:
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/google-t5/t5-base"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -63,10 +84,27 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 </python>
 
 <js>
+Using `huggingface.js`:
+```js
+import { HfInference } from "@huggingface/inference";
+
+const client = new HfInference("hf_***");
+
+const output = await client.translation({
+	model: "facebook/nllb-200-1.3B",
+	inputs: "Меня зовут Вольфганг и я живу в Берлине",
+	provider: "hf-inference",
+});
+
+console.log(output);
+
+```
+
+Using `fetch`:
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/google-t5/t5-base",
+		"https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
diff --git a/docs/api-inference/tasks/zero-shot-classification.md b/docs/api-inference/tasks/zero-shot-classification.md
index a20d5bc9f..0b0d3a1a0 100644
--- a/docs/api-inference/tasks/zero-shot-classification.md
+++ b/docs/api-inference/tasks/zero-shot-classification.md
@@ -35,7 +35,7 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/facebook/bart-large-mnli \
+curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
 	-X POST \
 	-d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
 	-H 'Content-Type: application/json' \
@@ -47,7 +47,7 @@ curl https://api-inference.huggingface.co/models/facebook/bart-large-mnli \
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-mnli"
+API_URL = "https://router.huggingface.co/hf-inference/v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -66,24 +66,24 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 <js>
 ```js
 async function query(data) {
-	const response = await fetch(
-		"https://api-inference.huggingface.co/models/facebook/bart-large-mnli",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
+			const response = await fetch(
+				"https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli",
+				{
+					headers: {
+						Authorization: "Bearer hf_***",
+						"Content-Type": "application/json",
+					},
+					method: "POST",
+					body: JSON.stringify(data),
+				}
+			);
+			const result = await response.json();
+			return result;
 		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
-	console.log(JSON.stringify(response));
-});
+		
+		query({"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
+			console.log(JSON.stringify(response));
+		});
 ```
 
 To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#zeroshotclassification).
diff --git a/scripts/api-inference/package.json b/scripts/api-inference/package.json
index 37ce4b17a..db8c3036e 100644
--- a/scripts/api-inference/package.json
+++ b/scripts/api-inference/package.json
@@ -14,7 +14,8 @@
   "author": "",
   "license": "ISC",
   "dependencies": {
-    "@huggingface/tasks": "^0.14.0",
+    "@huggingface/inference": "^3.5.0",
+    "@huggingface/tasks": "^0.17.0",
     "@types/node": "^22.5.0",
     "handlebars": "^4.7.8",
     "node": "^20.17.0",
diff --git a/scripts/api-inference/pnpm-lock.yaml b/scripts/api-inference/pnpm-lock.yaml
index 898461241..59e335c81 100644
--- a/scripts/api-inference/pnpm-lock.yaml
+++ b/scripts/api-inference/pnpm-lock.yaml
@@ -8,9 +8,12 @@ importers:
 
   .:
     dependencies:
+      '@huggingface/inference':
+        specifier: ^3.5.0
+        version: 3.5.0
       '@huggingface/tasks':
-        specifier: ^0.14.0
-        version: 0.14.0
+        specifier: ^0.17.0
+        version: 0.17.0
       '@types/node':
         specifier: ^22.5.0
         version: 22.5.0
@@ -186,8 +189,12 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@huggingface/tasks@0.14.0':
-    resolution: {integrity: sha512-N7Zb5dX2Pl/fGBZ3dOCtjdvTr1CmVQ0ZRE/6VLqXsPeSYI+aoOUnICDya6lvo8INJYGCvjrgTM3keBeed/mfjg==}
+  '@huggingface/inference@3.5.0':
+    resolution: {integrity: sha512-5IKkI/HJDDWg5aVWyd60kj27L9Kwxyyvu64U1To4/HzsZj13flqv2rJMrT6OB0izvFwTfUN1SDrrA5OH3YbxQQ==}
+    engines: {node: '>=18'}
+
+  '@huggingface/tasks@0.17.0':
+    resolution: {integrity: sha512-tRF2gBBgt71VGTZa5Per7HzDWkQ+gzR6Ay2X2i0Cf9FJTMoHBIK0JJQ3W4wL1g4ebdDWiDppganqz8Upy3Hy4A==}
 
   '@jridgewell/resolve-uri@3.1.2':
     resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
@@ -404,7 +411,11 @@ snapshots:
   '@esbuild/win32-x64@0.23.1':
     optional: true
 
-  '@huggingface/tasks@0.14.0': {}
+  '@huggingface/inference@3.5.0':
+    dependencies:
+      '@huggingface/tasks': 0.17.0
+
+  '@huggingface/tasks@0.17.0': {}
 
   '@jridgewell/resolve-uri@3.1.2': {}
 
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index cae879700..98254a84e 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -1,4 +1,5 @@
-import { PipelineType, snippets } from "@huggingface/tasks";
+import { snippets } from "@huggingface/inference";
+import { PipelineType, InferenceSnippet } from "@huggingface/tasks";
 import Handlebars from "handlebars";
 import * as fs from "node:fs/promises";
 import * as path from "node:path/posix";
@@ -78,11 +79,13 @@ function readTemplate(
 function writeTaskDoc(templateName: string, content: string): Promise<void> {
   const taskDocPath = path.join(TASKS_DOCS_DIR, `${templateName}.md`);
   console.log(`   💾 Saving to ${taskDocPath}`);
-  const header = PAGE_HEADER({task:templateName});
+  const header = PAGE_HEADER({ task: templateName });
   const contentWithHeader = `<!---\n${header}\n--->\n\n${content}`;
   return fs
     .mkdir(TASKS_DOCS_DIR, { recursive: true })
-    .then(() => fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" }));
+    .then(() =>
+      fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" }),
+    );
 }
 
 /////////////////////////
@@ -99,44 +102,53 @@ const TASKS_DATA = (await response.json()) as any;
 //// Snippet utils ////
 ///////////////////////
 
-const formatSnippets = (result: snippets.types.InferenceSnippet | snippets.types.InferenceSnippet[], defaultClient: string, language: string): string => {
+const formatSnippets = (
+  result: InferenceSnippet | InferenceSnippet[],
+  defaultClient: string,
+  language: string,
+): string => {
   // For single snippet, just wrap with code block
   if (!Array.isArray(result) || result.length === 1) {
     const snippet = Array.isArray(result) ? result[0] : result;
     return `\`\`\`${language}\n${snippet.content}\n\`\`\``;
   }
-  
+
   // For multiple snippets, add description and wrap each one
   return result
-    .map(snippet => {
+    .map((snippet) => {
       const client = snippet.client || defaultClient;
       return `Using \`${client}\`:\n\`\`\`${language}\n${snippet.content}\n\`\`\``;
     })
-    .join('\n\n');
+    .join("\n\n");
 };
 
-
 const GET_SNIPPET_FN = {
   curl: (modelData: any, token: string) => {
-    const result = snippets.curl.getCurlInferenceSnippet(modelData, token);
-    return formatSnippets(result, 'curl', 'bash');
+    const result = snippets.curl.getCurlInferenceSnippet(
+      modelData,
+      token,
+      "hf-inference",
+    );
+    return formatSnippets(result, "curl", "bash");
   },
   js: (modelData: any, token: string) => {
-    const result = snippets.js.getJsInferenceSnippet(modelData, token);
-    return formatSnippets(result, 'javascript', 'js');
+    const result = snippets.js.getJsInferenceSnippet(
+      modelData,
+      token,
+      "hf-inference",
+    );
+    return formatSnippets(result, "javascript", "js");
   },
   python: (modelData: any, token: string) => {
-    const result = snippets.python.getPythonInferenceSnippet(modelData, token);
-    return formatSnippets(result, 'python', 'py');
+    const result = snippets.python.getPythonInferenceSnippet(
+      modelData,
+      token,
+      "hf-inference",
+    );
+    return formatSnippets(result, "python", "py");
   },
 } as const;
 
-const HAS_SNIPPET_FN = {
-  curl: snippets.curl.hasCurlInferenceSnippet,
-  js: snippets.js.hasJsInferenceSnippet,
-  python: snippets.python.hasPythonInferenceSnippet,
-} as const;
-
 export function getInferenceSnippet(
   id: string,
   pipeline_tag: PipelineType,
@@ -153,9 +165,9 @@ export function getInferenceSnippet(
     tags: tags ?? [],
   };
   // @ts-ignore
-  if (HAS_SNIPPET_FN[language](modelData)) {
-    // @ts-ignore
-    return GET_SNIPPET_FN[language](modelData, "hf_***");
+  const generatedSnippets = GET_SNIPPET_FN[language](modelData, "hf_***");
+  if (generatedSnippets) {
+    return generatedSnippets;
   }
 }
 
@@ -445,7 +457,6 @@ TASKS.forEach((task) => {
   });
 });
 
-
 // Render specs
 await Promise.all(
   TASKS_EXTENDED.map(async (task) => {
@@ -483,16 +494,16 @@ function fetchChatCompletion() {
     {
       name: "chat-completion",
       baseName: "text-generation",
-      pipelineTag: "text-generation"
+      pipelineTag: "text-generation",
     },
     {
       name: "conversational-image-text-to-text",
       baseName: "image-text-to-text",
-      pipelineTag: "image-text-to-text"
-    }
+      pipelineTag: "image-text-to-text",
+    },
   ];
 
-  conversationalTasks.forEach(task => {
+  conversationalTasks.forEach((task) => {
     // Recommended models based on the base task
     DATA.models[task.name] = DATA.models[task.baseName].filter(
       // @ts-ignore
@@ -503,18 +514,35 @@ function fetchChatCompletion() {
 
     const taskSnippets = {
       // @ts-ignore
-      curl: getInferenceSnippet(mainModel.id, task.pipelineTag, "curl", mainModel.config, ["conversational"]),
+      curl: getInferenceSnippet(
+        mainModel.id,
+        task.pipelineTag,
+        "curl",
+        mainModel.config,
+        ["conversational"],
+      ),
       // @ts-ignore
-      python: getInferenceSnippet(mainModel.id, task.pipelineTag, "python", mainModel.config, ["conversational"]),
+      python: getInferenceSnippet(
+        mainModel.id,
+        task.pipelineTag,
+        "python",
+        mainModel.config,
+        ["conversational"],
+      ),
       // @ts-ignore
-      javascript: getInferenceSnippet(mainModel.id, task.pipelineTag, "js", mainModel.config, ["conversational"]),
+      javascript: getInferenceSnippet(
+        mainModel.id,
+        task.pipelineTag,
+        "js",
+        mainModel.config,
+        ["conversational"],
+      ),
     };
     DATA.snippets[task.name] = SNIPPETS_TEMPLATE({
       taskSnippets,
       taskSnakeCase: baseName.replaceAll("-", "_"),
       taskAttached: baseName.replaceAll("-", ""),
     });
-
   });
 }
 
@@ -541,4 +569,4 @@ await Promise.all(
   }),
 );
 
-console.log("✅ All done!");
\ No newline at end of file
+console.log("✅ All done!");