From d5396972127cc0da3531bda31d23a4aada932f20 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Thu, 20 Mar 2025 17:47:22 +0100
Subject: [PATCH 1/6] more advanced

---
 .../tasks/audio-classification.md             |  69 ++-
 .../tasks/automatic-speech-recognition.md     | 109 ++--
 docs/api-inference/tasks/chat-completion.md   | 569 ++++++++++--------
 .../api-inference/tasks/feature-extraction.md | 112 ++--
 docs/api-inference/tasks/fill-mask.md         | 110 ++--
 .../tasks/image-classification.md             |  90 ++-
 .../api-inference/tasks/image-segmentation.md |  69 ++-
 .../api-inference/tasks/image-text-to-text.md | 154 +++--
 docs/api-inference/tasks/image-to-image.md    |  57 +-
 docs/api-inference/tasks/object-detection.md  |  69 ++-
 .../api-inference/tasks/question-answering.md | 118 ++--
 docs/api-inference/tasks/summarization.md     | 110 ++--
 .../tasks/table-question-answering.md         | 134 +++--
 .../tasks/text-classification.md              | 110 ++--
 docs/api-inference/tasks/text-generation.md   | 110 ++--
 docs/api-inference/tasks/text-to-image.md     |  91 ++-
 .../tasks/token-classification.md             | 110 ++--
 docs/api-inference/tasks/translation.md       | 110 ++--
 .../tasks/zero-shot-classification.md         |  82 +--
 scripts/api-inference/package.json            |   4 +-
 scripts/api-inference/pnpm-lock.yaml          |  34 +-
 scripts/api-inference/scripts/generate.ts     | 181 ++----
 .../common/snippets-template.handlebars       |  34 +-
 23 files changed, 1442 insertions(+), 1194 deletions(-)
diff --git a/docs/api-inference/tasks/audio-classification.md b/docs/api-inference/tasks/audio-classification.md
index 47144b754..0975ca21e 100644
--- a/docs/api-inference/tasks/audio-classification.md
+++ b/docs/api-inference/tasks/audio-classification.md
@@ -40,60 +40,79 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector \
-	-X POST \
-	--data-binary '@sample1.flac' \
-	-H 'Authorization: Bearer hf_***'
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.audio_classification("sample1.flac", model="speechbrain/google_speech_command_xvector")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
+    return response.json()
 
 output = query("sample1.flac")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.audio_classification).
-</python>
+</snippet>
 
-<js>
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
 		"https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "audio/flac"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("sample1.flac").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "sample1.flac" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#audioclassification).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: audio/flac' \
+    --data-binary @"sample1.flac"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/automatic-speech-recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md
index c28a10f14..bdac69319 100644
--- a/docs/api-inference/tasks/automatic-speech-recognition.md
+++ b/docs/api-inference/tasks/automatic-speech-recognition.md
@@ -39,79 +39,100 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3 \
-	-X POST \
-	--data-binary '@sample1.flac' \
-	-H 'Authorization: Bearer hf_***'
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
+    return response.json()
 
 output = query("sample1.flac")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.automatic_speech_recognition).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const data = fs.readFileSync("sample1.flac");
-
-const output = await client.automaticSpeechRecognition({
-	data,
-	model: "openai/whisper-large-v3",
-	provider: "hf-inference",
-});
+</snippet>
 
-console.log(output);
-
-```
-
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
 		"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "audio/flac"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("sample1.flac").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "sample1.flac" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#automaticspeechrecognition).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const data = fs.readFileSync("sample1.flac");
+
+const output = await client.automaticSpeechRecognition({
+	data,
+	model: "openai/whisper-large-v3",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3 \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: audio/flac' \
+    --data-binary @"sample1.flac"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index e4aac572d..ca0b07e33 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -60,147 +60,159 @@ The API supports:
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl 'https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/chat/completions' \
--H 'Authorization: Bearer hf_***' \
--H 'Content-Type: application/json' \
---data '{
-    "model": "google/gemma-2-2b-it",
-    "messages": [
-		{
-			"role": "user",
-			"content": "What is the capital of France?"
-		}
-	],
-    "max_tokens": 500,
-    "stream": true
-}'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": "What is the capital of France?"
-	}
-]
-
-stream = client.chat.completions.create(
-	model="google/gemma-2-2b-it", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="google/gemma-2-2b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-Using `openai`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    "max_tokens": 500,
+    "model": "google/gemma-2-2b-it"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+<snippet provider="hf-inference" language="python" client="openai">
+        
+```python
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://router.huggingface.co/hf-inference/v1",
-	api_key="hf_***"
+    base_url="https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1",
+    api_key="hf_***"
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": "What is the capital of France?"
-	}
-]
-
-stream = client.chat.completions.create(
-    model="google/gemma-2-2b-it", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="google/gemma-2-2b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-	print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
-</python>
+</snippet>
 
-<js>
-Using `huggingface.js`:
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
 ```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-let out = "";
-
-const stream = client.chatCompletionStream({
-	model: "google/gemma-2-2b-it",
-	messages: [
-		{
-			role: "user",
-			content: "What is the capital of France?"
-		}
-	],
-	provider: "hf-inference",
-	max_tokens: 500,
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hf-inference",
+    model: "google/gemma-2-2b-it",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
 ```
 
-Using `openai`:
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="js" client="openai">
+        
 ```js
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/v1",
-	apiKey: "hf_***"
+	baseURL: "https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1",
+	apiKey: "hf_***",
 });
 
-let out = "";
-
-const stream = await client.chat.completions.create({
+const chatCompletion = await client.chat.completions.create({
 	model: "google/gemma-2-2b-it",
-	messages: [
-		{
-			role: "user",
-			content: "What is the capital of France?"
-		}
-	],
-	max_tokens: 500,
-	stream: true,
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": "What is the capital of France?"
+            }
+        ],
+        "max_tokens": 500,
+        "model": "google/gemma-2-2b-it",
+        "stream": false
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
@@ -211,202 +223,225 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl 'https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions' \
--H 'Authorization: Bearer hf_***' \
--H 'Content-Type: application/json' \
---data '{
-    "model": "Qwen/Qwen2.5-VL-7B-Instruct",
-    "messages": [
-		{
-			"role": "user",
-			"content": [
-				{
-					"type": "text",
-					"text": "Describe this image in one sentence."
-				},
-				{
-					"type": "image_url",
-					"image_url": {
-						"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-					}
-				}
-			]
-		}
-	],
-    "max_tokens": 500,
-    "stream": true
-}'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": [
-			{
-				"type": "text",
-				"text": "Describe this image in one sentence."
-			},
-			{
-				"type": "image_url",
-				"image_url": {
-					"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-				}
-			}
-		]
-	}
-]
-
-stream = client.chat.completions.create(
-	model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-Using `openai`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+<snippet provider="hf-inference" language="python" client="openai">
+        
+```python
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://router.huggingface.co/hf-inference/v1",
-	api_key="hf_***"
+    base_url="https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+    api_key="hf_***"
 )
 
-messages = [
-	{
-		"role": "user",
-		"content": [
-			{
-				"type": "text",
-				"text": "Describe this image in one sentence."
-			},
-			{
-				"type": "image_url",
-				"image_url": {
-					"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-				}
-			}
-		]
-	}
-]
-
-stream = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
-for chunk in stream:
-	print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
-</python>
+</snippet>
 
-<js>
-Using `huggingface.js`:
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
 ```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-let out = "";
-
-const stream = client.chatCompletionStream({
-	model: "Qwen/Qwen2.5-VL-7B-Instruct",
-	messages: [
-		{
-			role: "user",
-			content: [
-				{
-					type: "text",
-					text: "Describe this image in one sentence."
-				},
-				{
-					type: "image_url",
-					image_url: {
-						url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-					}
-				}
-			]
-		}
-	],
-	provider: "hf-inference",
-	max_tokens: 500,
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hf-inference",
+    model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
 ```
 
-Using `openai`:
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="js" client="openai">
+        
 ```js
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/v1",
-	apiKey: "hf_***"
+	baseURL: "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+	apiKey: "hf_***",
 });
 
-let out = "";
-
-const stream = await client.chat.completions.create({
+const chatCompletion = await client.chat.completions.create({
 	model: "Qwen/Qwen2.5-VL-7B-Instruct",
-	messages: [
-		{
-			role: "user",
-			content: [
-				{
-					type: "text",
-					text: "Describe this image in one sentence."
-				},
-				{
-					type: "image_url",
-					image_url: {
-						url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-					}
-				}
-			]
-		}
-	],
-	max_tokens: 500,
-	stream: true,
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
 });
 
-for await (const chunk of stream) {
-	if (chunk.choices && chunk.choices.length > 0) {
-		const newContent = chunk.choices[0].delta.content;
-		out += newContent;
-		console.log(newContent);
-	}  
-}
+console.log(chatCompletion.choices[0].message);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
+        "model": "Qwen/Qwen2.5-VL-7B-Instruct",
+        "stream": false
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/feature-extraction.md b/docs/api-inference/tasks/feature-extraction.md
index fc707ad85..f86638120 100644
--- a/docs/api-inference/tasks/feature-extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -38,77 +38,50 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/thenlper/gte-large \
-	-X POST \
-	-d '{"inputs": "Today is a sunny day and I will get some ice cream."}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.feature_extraction(
-	model="thenlper/gte-large",
-	inputs="Today is a sunny day and I will get some ice cream.",
-	provider="hf-inference",
+    inputs="Today is a sunny day and I will get some ice cream.",
+    model="thenlper/gte-large",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/pipeline/feature-extraction/thenlper/gte-large"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "Today is a sunny day and I will get some ice cream.",
+    "inputs": "Today is a sunny day and I will get some ice cream.",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.feature_extraction).
-</python>
+</snippet>
 
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.featureExtraction({
-	model: "thenlper/gte-large",
-	inputs: "Today is a sunny day and I will get some ice cream.",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
-
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/thenlper/gte-large",
+		"https://router.huggingface.co/hf-inference/pipeline/feature-extraction/thenlper/gte-large",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -122,13 +95,46 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "Today is a sunny day and I will get some ice cream."}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "Today is a sunny day and I will get some ice cream." }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#featureextraction).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.featureExtraction({
+	model: "thenlper/gte-large",
+	inputs: "Today is a sunny day and I will get some ice cream.",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/pipeline/feature-extraction/thenlper/gte-large \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Today is a sunny day and I will get some ice cream.\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index 70e7c256c..dd59cc641 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -33,73 +33,46 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base \
-	-X POST \
-	-d '{"inputs": "The answer to the universe is [MASK]."}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.fill_mask(
-	model="FacebookAI/xlm-roberta-base",
-	inputs="The answer to the universe is [MASK].",
-	provider="hf-inference",
+    inputs="The answer to the universe is [MASK].",
+    model="FacebookAI/xlm-roberta-base",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "The answer to the universe is [MASK].",
+    "inputs": "The answer to the universe is [MASK].",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.fill_mask).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.fillMask({
-	model: "FacebookAI/xlm-roberta-base",
-	inputs: "The answer to the universe is [MASK].",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -117,13 +90,46 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "The answer to the universe is [MASK]."}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "The answer to the universe is [MASK]." }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#fillmask).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.fillMask({
+	model: "FacebookAI/xlm-roberta-base",
+	inputs: "The answer to the universe is [MASK].",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"The answer to the universe is [MASK].\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index 0a0eb78c8..457d01bb4 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -35,60 +35,100 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224 \
-	-X POST \
-	--data-binary '@cats.jpg' \
-	-H 'Authorization: Bearer hf_***'
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.image_classification("cats.jpg", model="google/vit-base-patch16-224")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
+    return response.json()
 
 output = query("cats.jpg")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_classification).
-</python>
+</snippet>
 
-<js>
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
 		"https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "image/jpeg"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("cats.jpg").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "cats.jpg" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const data = fs.readFileSync("cats.jpg");
+
+const output = await client.imageClassification({
+	data,
+	model: "google/vit-base-patch16-224",
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imageclassification).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224 \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: image/jpeg' \
+    --data-binary @"cats.jpg"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index 459b5037d..8d3f0117e 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -34,60 +34,79 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small \
-	-X POST \
-	--data-binary '@cats.jpg' \
-	-H 'Authorization: Bearer hf_***'
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.image_segmentation("cats.jpg", model="openmmlab/upernet-convnext-small")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
+    return response.json()
 
 output = query("cats.jpg")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_segmentation).
-</python>
+</snippet>
 
-<js>
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
 		"https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "image/jpeg"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("cats.jpg").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "cats.jpg" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imagesegmentation).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: image/jpeg' \
+    --data-binary @"cats.jpg"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md
index 14903fb01..bf1c401a1 100644
--- a/docs/api-inference/tasks/image-text-to-text.md
+++ b/docs/api-inference/tasks/image-text-to-text.md
@@ -33,89 +33,123 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct \
-	-X POST \
-	-d '{"inputs": "Can you please let us know more details about your "}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
-messages = "\"Can you please let us know more details about your \""
-
-stream = client.chat.completions.create(
-	model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    inputs="\"Can you please let us know more details about your \"",
 )
 
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "inputs": "\"Can you please let us know more details about your \"",
+    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+})
+
+print(response["choices"][0]["message"])
 ```
 
-Using `openai`:
-```py
+</snippet>
+
+<snippet provider="hf-inference" language="python" client="openai">
+        
+```python
 from openai import OpenAI
 
 client = OpenAI(
-	base_url="https://router.huggingface.co/hf-inference/v1",
-	api_key="hf_***"
+    base_url="https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+    api_key="hf_***"
 )
 
-messages = "\"Can you please let us know more details about your \""
-
-stream = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct", 
-	messages=messages, 
-	max_tokens=500,
-	stream=True
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    inputs="\"Can you please let us know more details about your \"",
 )
 
-for chunk in stream:
-	print(chunk.choices[0].delta.content, end="")
+print(completion.choices[0].message)
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_text_to_text).
-</python>
+</snippet>
 
-<js>
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
 ```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({"inputs": "Can you please let us know more details about your "}).then((response) => {
-	console.log(JSON.stringify(response));
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hf-inference",
+    model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    inputs: "\"Can you please let us know more details about your \"",
 });
+
+console.log(chatCompletion.choices[0].message);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imagetexttotext).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="js" client="openai">
+        
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    inputs: "\"Can you please let us know more details about your \"",
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Can you please let us know more details about your \"",
+        "model": "Qwen/Qwen2.5-VL-7B-Instruct",
+        "stream": false
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/image-to-image.md b/docs/api-inference/tasks/image-to-image.md
index 810b97e57..845731ba8 100644
--- a/docs/api-inference/tasks/image-to-image.md
+++ b/docs/api-inference/tasks/image-to-image.md
@@ -35,7 +35,62 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-No snippet available for this task.
+<inferencesnippet>
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.image_to_image(
+    "cat.png",
+    prompt="Turn the cat into a tiger.",
+    model="<REPO_ID>",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
+import base64
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/<REPO_ID>"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    with open(payload["inputs"], "rb") as f:
+        img = f.read()
+        payload["inputs"] = base64.b64encode(img).decode("utf-8")
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.content
+
+image_bytes = query({
+    "inputs": "cat.png",
+    "parameters": {
+        "prompt": "Turn the cat into a tiger."
+    }
+})
+
+# You can access the image with PIL.Image for example
+import io
+from PIL import Image
+image = Image.open(io.BytesIO(image_bytes))
+```
+
+</snippet>
+
+
+</inferencesnippet>
 
 
 
diff --git a/docs/api-inference/tasks/object-detection.md b/docs/api-inference/tasks/object-detection.md
index b8fde8d08..544d9a6af 100644
--- a/docs/api-inference/tasks/object-detection.md
+++ b/docs/api-inference/tasks/object-detection.md
@@ -33,60 +33,79 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
-	-X POST \
-	--data-binary '@cats.jpg' \
-	-H 'Authorization: Bearer hf_***'
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+output = client.object_detection("cats.jpg", model="facebook/detr-resnet-50")
 ```
-</curl>
 
-<python>
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
-	with open(filename, "rb") as f:
-		data = f.read()
-	response = requests.post(API_URL, headers=headers, data=data)
-	return response.json()
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
+    return response.json()
 
 output = query("cats.jpg")
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.object_detection).
-</python>
+</snippet>
 
-<js>
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
-async function query(filename) {
-	const data = fs.readFileSync(filename);
+async function query(data) {
 	const response = await fetch(
 		"https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
+				"Content-Type": "image/jpeg"
 			},
 			method: "POST",
-			body: data,
+			body: JSON.stringify(data),
 		}
 	);
 	const result = await response.json();
 	return result;
 }
 
-query("cats.jpg").then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "cats.jpg" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#objectdetection).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: image/jpeg' \
+    --data-binary @"cats.jpg"
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/question-answering.md b/docs/api-inference/tasks/question-answering.md
index 0cca700b3..56ccf9033 100644
--- a/docs/api-inference/tasks/question-answering.md
+++ b/docs/api-inference/tasks/question-answering.md
@@ -35,82 +35,52 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2 \
-	-X POST \
-	-d '{"inputs": { "question": "What is my name?", "context": "My name is Clara and I live in Berkeley." }}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.question_answering(
-	model="deepset/roberta-base-squad2",
-	inputs={
+    inputs={
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
 },
-	provider="hf-inference",
+    model="deepset/roberta-base-squad2",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": {
+    "inputs": {
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
 },
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.question_answering).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.questionAnswering({
-	model: "deepset/roberta-base-squad2",
-	inputs: {
-	"question": "What is my name?",
-	"context": "My name is Clara and I live in Berkeley."
-},
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -128,16 +98,52 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": {
+query({ inputs: {
+	"question": "What is my name?",
+	"context": "My name is Clara and I live in Berkeley."
+} }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.questionAnswering({
+	model: "deepset/roberta-base-squad2",
+	inputs: {
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
-}}).then((response) => {
-	console.log(JSON.stringify(response));
+},
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#questionanswering).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2 \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "{\n\t\"question\": \"What is my name?\",\n\t\"context\": \"My name is Clara and I live in Berkeley.\"\n}"
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
index b55dfac70..daf273ea4 100644
--- a/docs/api-inference/tasks/summarization.md
+++ b/docs/api-inference/tasks/summarization.md
@@ -34,73 +34,46 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
-	-X POST \
-	-d '{"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.summarization(
-	model="facebook/bart-large-cnn",
-	inputs="The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
-	provider="hf-inference",
+    inputs="The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+    model="facebook/bart-large-cnn",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+    "inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.summarization).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.summarization({
-	model: "facebook/bart-large-cnn",
-	inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -118,13 +91,46 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct." }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#summarization).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.summarization({
+	model: "facebook/bart-large-cnn",
+	inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/table-question-answering.md b/docs/api-inference/tasks/table-question-answering.md
index 8004ab9b0..8e2357db2 100644
--- a/docs/api-inference/tasks/table-question-answering.md
+++ b/docs/api-inference/tasks/table-question-answering.md
@@ -34,29 +34,18 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/microsoft/tapex-base \
-	-X POST \
-	-d '{"inputs": { "query": "How many stars does the transformers repository have?", "table": { "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": [ "Python", "Python", "Rust, Python and NodeJS" ] } }}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.table_question_answering(
-	model="microsoft/tapex-base",
-	inputs={
+    inputs={
 	"query": "How many stars does the transformers repository have?",
 	"table": {
 		"Repository": ["Transformers", "Datasets", "Tokenizers"],
@@ -69,26 +58,27 @@ result = client.table_question_answering(
 		]
 	}
 },
-	provider="hf-inference",
+    model="microsoft/tapex-base",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/microsoft/tapex-base"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": {
+    "inputs": {
 	"query": "How many stars does the transformers repository have?",
 	"table": {
 		"Repository": ["Transformers", "Datasets", "Tokenizers"],
@@ -104,19 +94,28 @@ output = query({
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.table_question_answering).
-</python>
+</snippet>
 
-<js>
-Using `huggingface.js`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/hf-inference/models/microsoft/tapex-base",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
 
-const output = await client.tableQuestionAnswering({
-	model: "microsoft/tapex-base",
-	inputs: {
+query({ inputs: {
 	"query": "How many stars does the transformers repository have?",
 	"table": {
 		"Repository": ["Transformers", "Datasets", "Tokenizers"],
@@ -128,33 +127,23 @@ const output = await client.tableQuestionAnswering({
 			"Rust, Python and NodeJS"
 		]
 	}
-},
-	provider: "hf-inference",
+} }).then((response) => {
+    console.log(JSON.stringify(response));
 });
-
-console.log(output);
-
 ```
 
-Using `fetch`:
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
 ```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/microsoft/tapex-base",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
+import { InferenceClient } from "@huggingface/inference";
 
-query({"inputs": {
+const client = new InferenceClient("hf_***");
+
+const output = await client.tableQuestionAnswering({
+	model: "microsoft/tapex-base",
+	inputs: {
 	"query": "How many stars does the transformers repository have?",
 	"table": {
 		"Repository": ["Transformers", "Datasets", "Tokenizers"],
@@ -166,13 +155,30 @@ query({"inputs": {
 			"Rust, Python and NodeJS"
 		]
 	}
-}}).then((response) => {
-	console.log(JSON.stringify(response));
+},
+	provider: "hf-inference",
 });
+
+console.log(output);
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#tablequestionanswering).
-</js>
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/microsoft/tapex-base \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "{\n\t\"query\": \"How many stars does the transformers repository have?\",\n\t\"table\": {\n\t\t\"Repository\": [\"Transformers\", \"Datasets\", \"Tokenizers\"],\n\t\t\"Stars\": [\"36542\", \"4512\", \"3934\"],\n\t\t\"Contributors\": [\"651\", \"77\", \"34\"],\n\t\t\"Programming language\": [\n\t\t\t\"Python\",\n\t\t\t\"Python\",\n\t\t\t\"Rust, Python and NodeJS\"\n\t\t]\n\t}\n}"
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/text-classification.md b/docs/api-inference/tasks/text-classification.md
index ed2458c0c..befb504ad 100644
--- a/docs/api-inference/tasks/text-classification.md
+++ b/docs/api-inference/tasks/text-classification.md
@@ -37,73 +37,46 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english \
-	-X POST \
-	-d '{"inputs": "I like you. I love you"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.text_classification(
-	model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
-	inputs="I like you. I love you",
-	provider="hf-inference",
+    inputs="I like you. I love you",
+    model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "I like you. I love you",
+    "inputs": "I like you. I love you",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_classification).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.textClassification({
-	model: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
-	inputs: "I like you. I love you",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -121,13 +94,46 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "I like you. I love you"}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "I like you. I love you" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textclassification).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.textClassification({
+	model: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+	inputs: "I like you. I love you",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"I like you. I love you\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md
index 7cedd2c76..9df66dbed 100644
--- a/docs/api-inference/tasks/text-generation.md
+++ b/docs/api-inference/tasks/text-generation.md
@@ -40,73 +40,46 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it \
-	-X POST \
-	-d '{"inputs": "Can you please let us know more details about your "}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.text_generation(
-	model="google/gemma-2-2b-it",
-	inputs="Can you please let us know more details about your ",
-	provider="hf-inference",
+    inputs="Can you please let us know more details about your ",
+    model="google/gemma-2-2b-it",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "Can you please let us know more details about your ",
+    "inputs": "Can you please let us know more details about your ",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.textGeneration({
-	model: "google/gemma-2-2b-it",
-	inputs: "Can you please let us know more details about your ",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -124,13 +97,46 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "Can you please let us know more details about your "}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "Can you please let us know more details about your " }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textgeneration).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.textGeneration({
+	model: "google/gemma-2-2b-it",
+	inputs: "Can you please let us know more details about your ",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Can you please let us know more details about your \""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/text-to-image.md b/docs/api-inference/tasks/text-to-image.md
index 71bb3b141..9f919e80e 100644
--- a/docs/api-inference/tasks/text-to-image.md
+++ b/docs/api-inference/tasks/text-to-image.md
@@ -35,46 +35,40 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev \
-	-X POST \
-	-d '{"inputs": "Astronaut riding a horse"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 # output is a PIL.Image object
 image = client.text_to_image(
-	"Astronaut riding a horse",
-	model="black-forest-labs/FLUX.1-dev"
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
 )
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.content
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.content
 
 image_bytes = query({
-	"inputs": "Astronaut riding a horse",
+    "inputs": "Astronaut riding a horse",
 })
 
 # You can access the image with PIL.Image for example
@@ -83,27 +77,10 @@ from PIL import Image
 image = Image.open(io.BytesIO(image_bytes))
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_to_image).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const image = await client.textToImage({
-	model: "black-forest-labs/FLUX.1-dev",
-	inputs: "Astronaut riding a horse",
-	parameters: { num_inference_steps: 5 },
-	provider: "hf-inference",
-});
-/// Use the generated image (it's a Blob)
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -120,13 +97,33 @@ async function query(data) {
 	const result = await response.blob();
 	return result;
 }
-query({"inputs": "Astronaut riding a horse"}).then((response) => {
-	// Use image
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#texttoimage).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "hf-inference",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/token-classification.md b/docs/api-inference/tasks/token-classification.md
index eeecde32c..1a8bac740 100644
--- a/docs/api-inference/tasks/token-classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -36,73 +36,46 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
-	-X POST \
-	-d '{"inputs": "My name is Sarah Jessica Parker but you can call me Jessica"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.token_classification(
-	model="dslim/bert-base-NER",
-	inputs="My name is Sarah Jessica Parker but you can call me Jessica",
-	provider="hf-inference",
+    inputs="My name is Sarah Jessica Parker but you can call me Jessica",
+    model="dslim/bert-base-NER",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "My name is Sarah Jessica Parker but you can call me Jessica",
+    "inputs": "My name is Sarah Jessica Parker but you can call me Jessica",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.token_classification).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.tokenClassification({
-	model: "dslim/bert-base-NER",
-	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -120,13 +93,46 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "My name is Sarah Jessica Parker but you can call me Jessica"}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "My name is Sarah Jessica Parker but you can call me Jessica" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#tokenclassification).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.tokenClassification({
+	model: "dslim/bert-base-NER",
+	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"My name is Sarah Jessica Parker but you can call me Jessica\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index bc4939d7b..8db4e2845 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -34,73 +34,46 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B \
-	-X POST \
-	-d '{"inputs": "Меня зовут Вольфганг и я живу в Берлине"}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-Using `huggingface_hub`:
-```py
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+        
+```python
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-	provider="hf-inference",
-	api_key="hf_***"
+    provider="hf-inference",
+    api_key="hf_***",
 )
 
 result = client.translation(
-	model="facebook/nllb-200-1.3B",
-	inputs="Меня зовут Вольфганг и я живу в Берлине",
-	provider="hf-inference",
+    inputs="Меня зовут Вольфганг и я живу в Берлине",
+    model="facebook/nllb-200-1.3B",
 )
-
-print(result)
-
 ```
 
-Using `requests`:
-```py
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
 output = query({
-	"inputs": "Меня зовут Вольфганг и я живу в Берлине",
+    "inputs": "Меня зовут Вольфганг и я живу в Берлине",
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.translation).
-</python>
-
-<js>
-Using `huggingface.js`:
-```js
-import { HfInference } from "@huggingface/inference";
-
-const client = new HfInference("hf_***");
-
-const output = await client.translation({
-	model: "facebook/nllb-200-1.3B",
-	inputs: "Меня зовут Вольфганг и я живу в Берлине",
-	provider: "hf-inference",
-});
-
-console.log(output);
-
-```
+</snippet>
 
-Using `fetch`:
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -118,13 +91,46 @@ async function query(data) {
 	return result;
 }
 
-query({"inputs": "Меня зовут Вольфганг и я живу в Берлине"}).then((response) => {
-	console.log(JSON.stringify(response));
+query({ inputs: "Меня зовут Вольфганг и я живу в Берлине" }).then((response) => {
+    console.log(JSON.stringify(response));
 });
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#translation).
-</js>
+</snippet>
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+        
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.translation({
+	model: "facebook/nllb-200-1.3B",
+	inputs: "Меня зовут Вольфганг и я живу в Берлине",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Меня зовут Вольфганг и я живу в Берлине\""
+    }'
+```
+
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/docs/api-inference/tasks/zero-shot-classification.md b/docs/api-inference/tasks/zero-shot-classification.md
index 0b0d3a1a0..b497ee503 100644
--- a/docs/api-inference/tasks/zero-shot-classification.md
+++ b/docs/api-inference/tasks/zero-shot-classification.md
@@ -33,26 +33,17 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
-<curl>
-```bash
-curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
-	-X POST \
-	-d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
-	-H 'Content-Type: application/json' \
-	-H 'Authorization: Bearer hf_***'
-```
-</curl>
-
-<python>
-```py
+<snippet provider="hf-inference" language="python" client="requests">
+        
+```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/v1"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
 
 output = query({
     "inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
@@ -60,34 +51,49 @@ output = query({
 })
 ```
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.zero_shot_classification).
-</python>
+</snippet>
 
-<js>
+<snippet provider="hf-inference" language="js" client="fetch">
+        
 ```js
 async function query(data) {
-			const response = await fetch(
-				"https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli",
-				{
-					headers: {
-						Authorization: "Bearer hf_***",
-						"Content-Type": "application/json",
-					},
-					method: "POST",
-					body: JSON.stringify(data),
-				}
-			);
-			const result = await response.json();
-			return result;
-		}
-		
-		query({"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
-			console.log(JSON.stringify(response));
-		});
+    const response = await fetch(
+		"https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli",
+        {
+            headers: {
+				Authorization: "Bearer hf_***",
+                "Content-Type": "application/json",
+            },
+            method: "POST",
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
+    parameters: { candidate_labels: ["refund", "legal", "faq"] }
+}).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+<snippet provider="hf-inference" language="sh" client="curl">
+        
+```sh
+curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
+    -X POST \
+    -d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
+    -H 'Content-Type: application/json' \
+    -H 'Authorization: Bearer hf_***'
 ```
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#zeroshotclassification).
-</js>
+</snippet>
+
 
 </inferencesnippet>
 
diff --git a/scripts/api-inference/package.json b/scripts/api-inference/package.json
index 49cd7a61f..f23b61183 100644
--- a/scripts/api-inference/package.json
+++ b/scripts/api-inference/package.json
@@ -14,8 +14,8 @@
   "author": "",
   "license": "ISC",
   "dependencies": {
-    "@huggingface/inference": "^3.5.0",
-    "@huggingface/tasks": "^0.17.4",
+    "@huggingface/inference": "^3.6.1",
+    "@huggingface/tasks": "^0.18.1",
     "@types/node": "^22.5.0",
     "handlebars": "^4.7.8",
     "node": "^20.17.0",
diff --git a/scripts/api-inference/pnpm-lock.yaml b/scripts/api-inference/pnpm-lock.yaml
index 46a86a1bd..26e5e4574 100644
--- a/scripts/api-inference/pnpm-lock.yaml
+++ b/scripts/api-inference/pnpm-lock.yaml
@@ -9,11 +9,11 @@ importers:
   .:
     dependencies:
       '@huggingface/inference':
-        specifier: ^3.5.0
-        version: 3.5.0
+        specifier: ^3.6.1
+        version: 3.6.1
       '@huggingface/tasks':
-        specifier: ^0.17.4
-        version: 0.17.4
+        specifier: ^0.18.1
+        version: 0.18.1
       '@types/node':
         specifier: ^22.5.0
         version: 22.5.0
@@ -189,12 +189,19 @@ packages:
     cpu: [x64]
     os: [win32]
 
-  '@huggingface/inference@3.5.0':
-    resolution: {integrity: sha512-5IKkI/HJDDWg5aVWyd60kj27L9Kwxyyvu64U1To4/HzsZj13flqv2rJMrT6OB0izvFwTfUN1SDrrA5OH3YbxQQ==}
+  '@huggingface/inference@3.6.1':
+    resolution: {integrity: sha512-EtQlbBqcZycPe+qiTEFI+wNHOMpG0gwNTaZSvYu1juN1p/1dEgqAb2GO31dxLgNev2PzH9d+9nm8GngOsIepJg==}
     engines: {node: '>=18'}
 
-  '@huggingface/tasks@0.17.4':
-    resolution: {integrity: sha512-LES7+OosthFKdqRL0e+bA2d4jfKmiQWuqahsPrv0+EsSZtdHdaZ3nje0f2g5wq4miHX4xWpBLuWJknjdnBwXsA==}
+  '@huggingface/jinja@0.3.3':
+    resolution: {integrity: sha512-vQQr2JyWvVFba3Lj9es4q9vCl1sAc74fdgnEMoX8qHrXtswap9ge9uO3ONDzQB0cQ0PUyaKY2N6HaVbTBvSXvw==}
+    engines: {node: '>=18'}
+
+  '@huggingface/tasks@0.17.9':
+    resolution: {integrity: sha512-lV6RgCJkqy3p93FFxP9H4SGJmFcHAwr1FO+Zk56q/JWsf7Tdsel1DEo1Xfd3An7ZPWpc2Y9ldRecGo9efDYghg==}
+
+  '@huggingface/tasks@0.18.1':
+    resolution: {integrity: sha512-HK6JTVB/nrgjOnbe77HFSENftfAp67AI4mHMR2x64Os1hvchuTT88M8fKEiyESSvqKFKwW4lQKkHva07p05AXw==}
 
   '@jridgewell/resolve-uri@3.1.2':
     resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
@@ -411,11 +418,16 @@ snapshots:
   '@esbuild/win32-x64@0.23.1':
     optional: true
 
-  '@huggingface/inference@3.5.0':
+  '@huggingface/inference@3.6.1':
     dependencies:
-      '@huggingface/tasks': 0.17.4
+      '@huggingface/jinja': 0.3.3
+      '@huggingface/tasks': 0.17.9
+
+  '@huggingface/jinja@0.3.3': {}
+
+  '@huggingface/tasks@0.17.9': {}
 
-  '@huggingface/tasks@0.17.4': {}
+  '@huggingface/tasks@0.18.1': {}
 
   '@jridgewell/resolve-uri@3.1.2': {}
 
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 98254a84e..be80ccf09 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -1,5 +1,9 @@
 import { snippets } from "@huggingface/inference";
-import { PipelineType, InferenceSnippet } from "@huggingface/tasks";
+import {
+  PipelineType,
+  InferenceSnippet,
+  type ModelDataMinimal,
+} from "@huggingface/tasks";
 import Handlebars from "handlebars";
 import * as fs from "node:fs/promises";
 import * as path from "node:path/posix";
@@ -28,7 +32,7 @@ const TASKS: PipelineType[] = [
 const TASKS_EXTENDED = [...TASKS, "chat-completion"];
 const SPECS_REVISION = "main";
 
-const inferenceSnippetLanguages = ["python", "js", "curl"] as const;
+const inferenceSnippetLanguages = ["python", "js", "sh"] as const;
 type InferenceSnippetLanguage = (typeof inferenceSnippetLanguages)[number];
 
 // Taken from https://stackoverflow.com/a/31632215
@@ -65,12 +69,12 @@ const TABLE_INDENT = NBSP.repeat(8);
 
 function readTemplate(
   templateName: string,
-  namespace: string,
+  namespace: string
 ): Promise<string> {
   const templatePath = path.join(
     TEMPLATE_DIR,
     namespace,
-    `${templateName}.handlebars`,
+    `${templateName}.handlebars`
   );
   console.log(`   🔍 Reading ${templateName}.handlebars`);
   return fs.readFile(templatePath, { encoding: "utf-8" });
@@ -84,7 +88,7 @@ function writeTaskDoc(templateName: string, content: string): Promise<void> {
   return fs
     .mkdir(TASKS_DOCS_DIR, { recursive: true })
     .then(() =>
-      fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" }),
+      fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" })
     );
 }
 
@@ -102,60 +106,12 @@ const TASKS_DATA = (await response.json()) as any;
 //// Snippet utils ////
 ///////////////////////
 
-const formatSnippets = (
-  result: InferenceSnippet | InferenceSnippet[],
-  defaultClient: string,
-  language: string,
-): string => {
-  // For single snippet, just wrap with code block
-  if (!Array.isArray(result) || result.length === 1) {
-    const snippet = Array.isArray(result) ? result[0] : result;
-    return `\`\`\`${language}\n${snippet.content}\n\`\`\``;
-  }
-
-  // For multiple snippets, add description and wrap each one
-  return result
-    .map((snippet) => {
-      const client = snippet.client || defaultClient;
-      return `Using \`${client}\`:\n\`\`\`${language}\n${snippet.content}\n\`\`\``;
-    })
-    .join("\n\n");
-};
-
-const GET_SNIPPET_FN = {
-  curl: (modelData: any, token: string) => {
-    const result = snippets.curl.getCurlInferenceSnippet(
-      modelData,
-      token,
-      "hf-inference",
-    );
-    return formatSnippets(result, "curl", "bash");
-  },
-  js: (modelData: any, token: string) => {
-    const result = snippets.js.getJsInferenceSnippet(
-      modelData,
-      token,
-      "hf-inference",
-    );
-    return formatSnippets(result, "javascript", "js");
-  },
-  python: (modelData: any, token: string) => {
-    const result = snippets.python.getPythonInferenceSnippet(
-      modelData,
-      token,
-      "hf-inference",
-    );
-    return formatSnippets(result, "python", "py");
-  },
-} as const;
-
-export function getInferenceSnippet(
+export function getFormattedInferenceSnippet(
   id: string,
   pipeline_tag: PipelineType,
-  language: InferenceSnippetLanguage,
   config?: JsonObject,
-  tags?: string[],
-): string | undefined {
+  tags?: string[]
+): InferenceSnippet[] {
   const modelData = {
     id,
     pipeline_tag,
@@ -163,12 +119,15 @@ export function getInferenceSnippet(
     library_name: "",
     config: config ?? {},
     tags: tags ?? [],
-  };
+    inference: "",
+  } as ModelDataMinimal;
   // @ts-ignore
-  const generatedSnippets = GET_SNIPPET_FN[language](modelData, "hf_***");
-  if (generatedSnippets) {
-    return generatedSnippets;
-  }
+  return snippets.getInferenceSnippets(
+    modelData,
+    "hf_***",
+    "hf-inference",
+    modelData.id
+  );
 }
 
 /////////////////////
@@ -178,13 +137,13 @@ export function getInferenceSnippet(
 type SpecNameType = "input" | "output" | "stream_output";
 
 const SPECS_URL_TEMPLATE = Handlebars.compile(
-  `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`,
+  `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`
 );
 const COMMON_DEFINITIONS_URL = `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/common-definitions.json`;
 
 async function fetchOneSpec(
   task: PipelineType,
-  name: SpecNameType,
+  name: SpecNameType
 ): Promise<JsonObject | undefined> {
   const url = SPECS_URL_TEMPLATE({ task, name });
   console.log(`   🕸️  Fetching ${task} ${name} specs`);
@@ -194,7 +153,7 @@ async function fetchOneSpec(
 }
 
 async function fetchSpecs(
-  task: PipelineType,
+  task: PipelineType
 ): Promise<
   Record<"input" | "output" | "stream_output", JsonObject | undefined>
 > {
@@ -232,7 +191,7 @@ function processPayloadSchema(schema: any): JsonObject[] {
     key: string,
     value: any,
     required: boolean,
-    parentPrefix: string,
+    parentPrefix: string
   ): void {
     const isRequired = required;
     let type = value.type || "unknown";
@@ -296,9 +255,9 @@ function processPayloadSchema(schema: any): JsonObject[] {
             nestedKey,
             nestedValue,
             nestedRequired,
-            parentPrefix + TABLE_INDENT,
+            parentPrefix + TABLE_INDENT
           );
-        },
+        }
       );
     } else if (isArray) {
       // Process array items
@@ -316,7 +275,7 @@ function processPayloadSchema(schema: any): JsonObject[] {
             `${NBSP}(#${index + 1})`,
             subSchema,
             false,
-            parentPrefix + TABLE_INDENT,
+            parentPrefix + TABLE_INDENT
           );
         });
       }
@@ -358,20 +317,20 @@ For more details about the \`{{task}}\` task, check out its [dedicated page](htt
 </Tip>`);
 
 const TIP_LIST_MODELS_LINK_TEMPLATE = Handlebars.compile(
-  `Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`,
+  `Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`
 );
 const SPECS_HEADERS = await readTemplate("specs-headers", "common");
 const PAGE_HEADER = Handlebars.compile(
-  await readTemplate("page-header", "common"),
+  await readTemplate("page-header", "common")
 );
 const SNIPPETS_TEMPLATE = Handlebars.compile(
-  await readTemplate("snippets-template", "common"),
+  await readTemplate("snippets-template", "common")
 );
 const SPECS_PAYLOAD_TEMPLATE = Handlebars.compile(
-  await readTemplate("specs-payload", "common"),
+  await readTemplate("specs-payload", "common")
 );
 const SPECS_OUTPUT_TEMPLATE = Handlebars.compile(
-  await readTemplate("specs-output", "common"),
+  await readTemplate("specs-output", "common")
 );
 
 ////////////////////
@@ -382,7 +341,15 @@ const DATA: {
   constants: {
     specsHeaders: string;
   };
-  models: Record<string, { id: string; description: string }[]>;
+  recommendedModels: Record<
+    string,
+    {
+      id: string;
+      description: string;
+      inference: string | undefined;
+      config: JsonObject | undefined;
+    }[]
+  >;
   snippets: Record<string, string>;
   specs: Record<
     string,
@@ -400,7 +367,7 @@ const DATA: {
   constants: {
     specsHeaders: SPECS_HEADERS,
   },
-  models: {},
+  recommendedModels: {},
   snippets: {},
   specs: {},
   tips: { linksToTaskPage: {}, listModelsLink: {} },
@@ -425,17 +392,17 @@ await Promise.all(
           const modelData = await fetch(url).then((res) => res.json());
           model.inference = modelData.inference;
           model.config = modelData.config;
-        },
-      ),
+        }
+      )
     );
-  }),
+  })
 );
 
 // Fetch recommended models
 TASKS.forEach((task) => {
-  DATA.models[task] = TASKS_DATA[task].models.filter(
+  DATA.recommendedModels[task] = TASKS_DATA[task].models.filter(
     (model: { inference: string }) =>
-      ["cold", "loading", "warm"].includes(model.inference),
+      ["cold", "loading", "warm"].includes(model.inference)
   );
 });
 
@@ -444,12 +411,8 @@ TASKS.forEach((task) => {
 TASKS.forEach((task) => {
   // Let's take as example the first available model that is recommended.
   // Otherwise, fallback to "<REPO_ID>".
-  const mainModel = DATA.models[task][0]?.id ?? "<REPO_ID>";
-  const taskSnippets = {
-    curl: getInferenceSnippet(mainModel, task, "curl"),
-    python: getInferenceSnippet(mainModel, task, "python"),
-    javascript: getInferenceSnippet(mainModel, task, "js"),
-  };
+  const mainModel = DATA.recommendedModels[task][0]?.id ?? "<REPO_ID>";
+  const taskSnippets = getFormattedInferenceSnippet(mainModel, task);
   DATA.snippets[task] = SNIPPETS_TEMPLATE({
     taskSnippets,
     taskSnakeCase: task.replaceAll("-", "_"),
@@ -475,7 +438,7 @@ await Promise.all(
           })
         : undefined,
     };
-  }),
+  })
 );
 
 // Render tips
@@ -505,39 +468,21 @@ function fetchChatCompletion() {
 
   conversationalTasks.forEach((task) => {
     // Recommended models based on the base task
-    DATA.models[task.name] = DATA.models[task.baseName].filter(
+    DATA.recommendedModels[task.name] = DATA.recommendedModels[
+      task.baseName
+    ].filter(
       // @ts-ignore
-      (model) => model.config?.tokenizer_config?.chat_template,
+      (model) => model.config?.tokenizer_config?.chat_template
     );
 
-    const mainModel = DATA.models[task.name][0];
+    const mainModel = DATA.recommendedModels[task.name][0];
 
-    const taskSnippets = {
-      // @ts-ignore
-      curl: getInferenceSnippet(
-        mainModel.id,
-        task.pipelineTag,
-        "curl",
-        mainModel.config,
-        ["conversational"],
-      ),
-      // @ts-ignore
-      python: getInferenceSnippet(
-        mainModel.id,
-        task.pipelineTag,
-        "python",
-        mainModel.config,
-        ["conversational"],
-      ),
-      // @ts-ignore
-      javascript: getInferenceSnippet(
-        mainModel.id,
-        task.pipelineTag,
-        "js",
-        mainModel.config,
-        ["conversational"],
-      ),
-    };
+    const taskSnippets = getFormattedInferenceSnippet(
+      mainModel.id,
+      task.pipelineTag as PipelineType,
+      mainModel.config,
+      ["conversational"]
+    );
     DATA.snippets[task.name] = SNIPPETS_TEMPLATE({
       taskSnippets,
       taskSnakeCase: baseName.replaceAll("-", "_"),
@@ -554,7 +499,7 @@ fetchChatCompletion();
 
 async function renderTemplate(
   templateName: string,
-  data: JsonObject,
+  data: JsonObject
 ): Promise<string> {
   console.log(`🎨  Rendering ${templateName}`);
   const template = Handlebars.compile(await readTemplate(templateName, "task"));
@@ -566,7 +511,7 @@ await Promise.all(
     // @ts-ignore
     const rendered = await renderTemplate(task, DATA);
     await writeTaskDoc(task, rendered);
-  }),
+  })
 );
 
 console.log("✅ All done!");
diff --git a/scripts/api-inference/templates/common/snippets-template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars
index 09202f6ba..96c382ef5 100644
--- a/scripts/api-inference/templates/common/snippets-template.handlebars
+++ b/scripts/api-inference/templates/common/snippets-template.handlebars
@@ -1,31 +1,23 @@
-{{#if (or taskSnippets.curl taskSnippets.python taskSnippets.javascript)}}
+{{#if taskSnippets.length }}
 
 <inferencesnippet>
 
-{{!-- cURL snippet (if exists) --}}
-{{#if taskSnippets.curl}}
-<curl>
-{{{taskSnippets.curl}}}
-</curl>
-{{/if}}
-
-{{!-- Python snippet (if exists) --}}
-{{#if taskSnippets.python}}
-<python>
-{{{taskSnippets.python}}}
+{{#each taskSnippets}}
+<snippet provider="hf-inference" language="{{this.language}}" client="{{this.client}}">
 
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
-</python>
-{{/if}}
+```{{this.language}}
+{{{this.content}}}
+```
 
-{{!-- JavaScript snippet (if exists) --}}
-{{#if taskSnippets.javascript}}
-<js>
-{{{taskSnippets.javascript}}}
+</snippet>
 
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).
-</js>
+{{#if (eq this.client "huggingface_hub")}}
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
+{{/if}}
+{{#if (eq this.client "huggingface.js")}}
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#{{taskAttached}}).
 {{/if}}
+{{/each}}
 
 </inferencesnippet>
 

From 83319426bcac57b3e66c7a0fe45b38a4225461d5 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Fri, 21 Mar 2025 15:43:55 +0100
Subject: [PATCH 2/6] better now? :)

---
 .../tasks/audio-classification.md             |  81 +--
 .../tasks/automatic-speech-recognition.md     | 126 ++++-
 docs/api-inference/tasks/chat-completion.md   | 465 ++++++++++++++++--
 .../api-inference/tasks/feature-extraction.md |  26 +-
 docs/api-inference/tasks/fill-mask.md         | 103 +---
 .../tasks/image-classification.md             |  28 +-
 .../api-inference/tasks/image-segmentation.md |  22 +-
 .../api-inference/tasks/image-text-to-text.md | 376 +++++++++++++-
 docs/api-inference/tasks/image-to-image.md    |  10 +-
 docs/api-inference/tasks/object-detection.md  |  13 +-
 .../api-inference/tasks/question-answering.md |  28 +-
 docs/api-inference/tasks/summarization.md     |  17 +-
 .../tasks/table-question-answering.md         | 152 +-----
 .../tasks/text-classification.md              |  30 +-
 docs/api-inference/tasks/text-generation.md   | 134 ++++-
 docs/api-inference/tasks/text-to-image.md     | 304 +++++++++++-
 .../tasks/token-classification.md             |  37 +-
 docs/api-inference/tasks/translation.md       |  27 +-
 .../tasks/zero-shot-classification.md         |  10 +-
 scripts/api-inference/scripts/generate.ts     | 291 ++++++++---
 .../common/snippets-template.handlebars       |   7 +-
 .../task/image-text-to-text.handlebars        |   4 +-
 .../task/token-classification.handlebars      |   8 -
 23 files changed, 1662 insertions(+), 637 deletions(-)

diff --git a/docs/api-inference/tasks/audio-classification.md b/docs/api-inference/tasks/audio-classification.md
index 0975ca21e..a7e5d55d8 100644
--- a/docs/api-inference/tasks/audio-classification.md
+++ b/docs/api-inference/tasks/audio-classification.md
@@ -29,92 +29,13 @@ For more details about the `audio-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [speechbrain/google_speech_command_xvector](https://huggingface.co/speechbrain/google_speech_command_xvector): An easy-to-use model for command recognition.
-- [ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition](https://huggingface.co/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition): An emotion recognition model.
-- [facebook/mms-lid-126](https://huggingface.co/facebook/mms-lid-126): A language identification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=audio-classification&sort=trending).
 
 ### Using the API
 
 
-<inferencesnippet>
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-output = client.audio_classification("sample1.flac", model="speechbrain/google_speech_command_xvector")
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-<snippet provider="hf-inference" language="python" client="requests">
-        
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
-    return response.json()
-
-output = query("sample1.flac")
-```
-
-</snippet>
-
-<snippet provider="hf-inference" language="js" client="fetch">
-        
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "audio/flac"
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "sample1.flac" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-<snippet provider="hf-inference" language="sh" client="curl">
-        
-```sh
-curl https://router.huggingface.co/hf-inference/models/speechbrain/google_speech_command_xvector \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: audio/flac' \
-    --data-binary @"sample1.flac"
-```
-
-</snippet>
-
-
-</inferencesnippet>
+No snippet available for this task.
 
 
 
diff --git a/docs/api-inference/tasks/automatic-speech-recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md
index bdac69319..cf50cb2c8 100644
--- a/docs/api-inference/tasks/automatic-speech-recognition.md
+++ b/docs/api-inference/tasks/automatic-speech-recognition.md
@@ -29,8 +29,6 @@ For more details about the `automatic-speech-recognition` task, check out its [d
 
 ### Recommended models
 
-- [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3): A powerful ASR model by OpenAI.
-- [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large): An end-to-end model that performs ASR and Speech Translation by MetaAI.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending).
 
@@ -39,8 +37,108 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
+<snippet provider="fal-ai" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="fal-ai",
+    api_key="hf_***",
+)
+
+output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3")
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="fal-ai" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/fal-ai/fal-ai/whisper"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(filename):
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
+    return response.json()
+
+output = query("sample1.flac")
+```
+
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/fal-ai/fal-ai/whisper",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "audio/flac"
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ inputs: "sample1.flac" }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const data = fs.readFileSync("sample1.flac");
+
+const output = await client.automaticSpeechRecognition({
+	data,
+	model: "openai/whisper-large-v3",
+	provider: "fal-ai",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="fal-ai" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/fal-ai/fal-ai/whisper \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: audio/flac' \
+    --data-binary @"sample1.flac"
+```
+
+</snippet>
+
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -49,18 +147,19 @@ client = InferenceClient(
     api_key="hf_***",
 )
 
-output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3")
+output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3-turbo")
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3"
+API_URL = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
@@ -74,12 +173,13 @@ output = query("sample1.flac")
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3",
+		"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -100,8 +200,9 @@ query({ inputs: "sample1.flac" }).then((response) => {
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
@@ -111,7 +212,7 @@ const data = fs.readFileSync("sample1.flac");
 
 const output = await client.automaticSpeechRecognition({
 	data,
-	model: "openai/whisper-large-v3",
+	model: "openai/whisper-large-v3-turbo",
 	provider: "hf-inference",
 });
 
@@ -121,10 +222,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3 \
+curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: audio/flac' \
diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index ca0b07e33..e376a1c09 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -21,16 +21,9 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/api-inferen
 
 #### Conversational Large Language Models (LLMs)
 
-- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
-- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
-- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
-- [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
-- [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
-- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 #### Conversational Vision-Language Models (VLMs)
 
-- [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct): Strong image-text-to-text model.
 
 ### API Playground
 
@@ -60,8 +53,9 @@ The API supports:
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -71,7 +65,7 @@ client = InferenceClient(
 )
 
 completion = client.chat.completions.create(
-    model="google/gemma-2-2b-it",
+    model="Qwen/QwQ-32B",
     messages=[
         {
             "role": "user",
@@ -87,12 +81,13 @@ print(completion.choices[0].message)
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/chat/completions"
+API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1/chat/completions"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -107,7 +102,7 @@ response = query({
         }
     ],
     "max_tokens": 500,
-    "model": "google/gemma-2-2b-it"
+    "model": "Qwen/QwQ-32B"
 })
 
 print(response["choices"][0]["message"])
@@ -115,18 +110,19 @@ print(response["choices"][0]["message"])
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="python" client="openai">
-        
+
 ```python
 from openai import OpenAI
 
 client = OpenAI(
-    base_url="https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1",
+    base_url="https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1",
     api_key="hf_***"
 )
 
 completion = client.chat.completions.create(
-    model="google/gemma-2-2b-it",
+    model="Qwen/QwQ-32B",
     messages=[
         {
             "role": "user",
@@ -141,8 +137,9 @@ print(completion.choices[0].message)
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
@@ -150,7 +147,7 @@ const client = new InferenceClient("hf_***");
 
 const chatCompletion = await client.chatCompletion({
     provider: "hf-inference",
-    model: "google/gemma-2-2b-it",
+    model: "Qwen/QwQ-32B",
     messages: [
         {
             role: "user",
@@ -166,18 +163,19 @@ console.log(chatCompletion.choices[0].message);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="js" client="openai">
-        
+
 ```js
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1",
+	baseURL: "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1",
 	apiKey: "hf_***",
 });
 
 const chatCompletion = await client.chat.completions.create({
-	model: "google/gemma-2-2b-it",
+	model: "Qwen/QwQ-32B",
     messages: [
         {
             role: "user",
@@ -192,10 +190,11 @@ console.log(chatCompletion.choices[0].message);
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/chat/completions \
+curl https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1/chat/completions \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
     -d '{
@@ -206,7 +205,166 @@ curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/c
             }
         ],
         "max_tokens": 500,
-        "model": "google/gemma-2-2b-it",
+        "model": "Qwen/QwQ-32B",
+        "stream": false
+    }'
+```
+
+</snippet>
+
+
+<snippet provider="together" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="together",
+    api_key="hf_***",
+)
+
+completion = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="together" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/together/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    "max_tokens": 500,
+    "model": "deepseek-ai/DeepSeek-R1"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+
+<snippet provider="together" language="python" client="openai">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://router.huggingface.co/together/v1",
+    api_key="hf_***"
+)
+
+completion = client.chat.completions.create(
+    model="deepseek-ai/DeepSeek-R1",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+
+<snippet provider="together" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "together",
+    model: "deepseek-ai/DeepSeek-R1",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="together" language="js" client="openai">
+
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/together/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "deepseek-ai/DeepSeek-R1",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="together" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/together/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": "What is the capital of France?"
+            }
+        ],
+        "max_tokens": 500,
+        "model": "deepseek-ai/DeepSeek-R1",
         "stream": false
     }'
 ```
@@ -223,8 +381,9 @@ curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it/v1/c
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -234,7 +393,7 @@ client = InferenceClient(
 )
 
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    model="google/gemma-3-27b-it",
     messages=[
         {
             "role": "user",
@@ -261,12 +420,13 @@ print(completion.choices[0].message)
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions"
+API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -292,7 +452,7 @@ response = query({
         }
     ],
     "max_tokens": 500,
-    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+    "model": "google/gemma-3-27b-it"
 })
 
 print(response["choices"][0]["message"])
@@ -300,18 +460,19 @@ print(response["choices"][0]["message"])
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="python" client="openai">
-        
+
 ```python
 from openai import OpenAI
 
 client = OpenAI(
-    base_url="https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+    base_url="https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
     api_key="hf_***"
 )
 
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    model="google/gemma-3-27b-it",
     messages=[
         {
             "role": "user",
@@ -337,8 +498,9 @@ print(completion.choices[0].message)
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
@@ -346,7 +508,7 @@ const client = new InferenceClient("hf_***");
 
 const chatCompletion = await client.chatCompletion({
     provider: "hf-inference",
-    model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    model: "google/gemma-3-27b-it",
     messages: [
         {
             role: "user",
@@ -373,18 +535,19 @@ console.log(chatCompletion.choices[0].message);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="js" client="openai">
-        
+
 ```js
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+	baseURL: "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
 	apiKey: "hf_***",
 });
 
 const chatCompletion = await client.chat.completions.create({
-	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+	model: "google/gemma-3-27b-it",
     messages: [
         {
             role: "user",
@@ -410,10 +573,236 @@ console.log(chatCompletion.choices[0].message);
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
+        "model": "google/gemma-3-27b-it",
+        "stream": false
+    }'
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hyperbolic",
+    api_key="hf_***",
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hyperbolic" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hyperbolic/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="openai">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://router.huggingface.co/hyperbolic/v1",
+    api_key="hf_***"
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hyperbolic",
+    model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hyperbolic" language="js" client="openai">
+
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hyperbolic/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="sh" client="curl">
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions \
+curl https://router.huggingface.co/hyperbolic/v1/chat/completions \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
     -d '{
diff --git a/docs/api-inference/tasks/feature-extraction.md b/docs/api-inference/tasks/feature-extraction.md
index f86638120..b246abe87 100644
--- a/docs/api-inference/tasks/feature-extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -29,7 +29,6 @@ For more details about the `feature-extraction` task, check out its [dedicated p
 
 ### Recommended models
 
-- [thenlper/gte-large](https://huggingface.co/thenlper/gte-large): A powerful feature extraction model for natural language processing tasks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=feature-extraction&sort=trending).
 
@@ -38,8 +37,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -50,19 +50,20 @@ client = InferenceClient(
 
 result = client.feature_extraction(
     inputs="Today is a sunny day and I will get some ice cream.",
-    model="thenlper/gte-large",
+    model="mixedbread-ai/mxbai-embed-large-v1",
 )
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/pipeline/feature-extraction/thenlper/gte-large"
+API_URL = "https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -76,12 +77,13 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/pipeline/feature-extraction/thenlper/gte-large",
+		"https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -102,15 +104,16 @@ query({ inputs: "Today is a sunny day and I will get some ice cream." }).then((r
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
 const client = new InferenceClient("hf_***");
 
 const output = await client.featureExtraction({
-	model: "thenlper/gte-large",
+	model: "mixedbread-ai/mxbai-embed-large-v1",
 	inputs: "Today is a sunny day and I will get some ice cream.",
 	provider: "hf-inference",
 });
@@ -121,10 +124,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/pipeline/feature-extraction/thenlper/gte-large \
+curl https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1 \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index dd59cc641..5eac91ac9 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -24,114 +24,13 @@ For more details about the `fill-mask` task, check out its [dedicated page](http
 
 ### Recommended models
 
-- [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base): A multilingual model trained on 100 languages.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending).
 
 ### Using the API
 
 
-<inferencesnippet>
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.fill_mask(
-    inputs="The answer to the universe is [MASK].",
-    model="FacebookAI/xlm-roberta-base",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-<snippet provider="hf-inference" language="python" client="requests">
-        
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "The answer to the universe is [MASK].",
-})
-```
-
-</snippet>
-
-<snippet provider="hf-inference" language="js" client="fetch">
-        
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "The answer to the universe is [MASK]." }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-        
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.fillMask({
-	model: "FacebookAI/xlm-roberta-base",
-	inputs: "The answer to the universe is [MASK].",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-<snippet provider="hf-inference" language="sh" client="curl">
-        
-```sh
-curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-base \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"The answer to the universe is [MASK].\""
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+No snippet available for this task.
 
 
 
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index 457d01bb4..c71575230 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -24,9 +24,6 @@ For more details about the `image-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model.
-- [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224): A robust image classification model.
-- [facebook/convnext-large-224](https://huggingface.co/facebook/convnext-large-224): A strong image classification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending).
 
@@ -35,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -45,18 +43,19 @@ client = InferenceClient(
     api_key="hf_***",
 )
 
-output = client.image_classification("cats.jpg", model="google/vit-base-patch16-224")
+output = client.image_classification("cats.jpg", model="Falconsai/nsfw_image_detection")
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224"
+API_URL = "https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
@@ -70,12 +69,13 @@ output = query("cats.jpg")
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224",
+		"https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -96,8 +96,9 @@ query({ inputs: "cats.jpg" }).then((response) => {
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
@@ -107,7 +108,7 @@ const data = fs.readFileSync("cats.jpg");
 
 const output = await client.imageClassification({
 	data,
-	model: "google/vit-base-patch16-224",
+	model: "Falconsai/nsfw_image_detection",
 	provider: "hf-inference",
 });
 
@@ -117,10 +118,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/google/vit-base-patch16-224 \
+curl https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: image/jpeg' \
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index 8d3f0117e..59be54262 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -24,8 +24,6 @@ For more details about the `image-segmentation` task, check out its [dedicated p
 
 ### Recommended models
 
-- [openmmlab/upernet-convnext-small](https://huggingface.co/openmmlab/upernet-convnext-small): Solid semantic segmentation model trained on ADE20k.
-- [facebook/mask2former-swin-large-coco-panoptic](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic): Panoptic segmentation model trained on the COCO (common objects) dataset.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
 
@@ -34,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -44,18 +43,19 @@ client = InferenceClient(
     api_key="hf_***",
 )
 
-output = client.image_segmentation("cats.jpg", model="openmmlab/upernet-convnext-small")
+output = client.image_segmentation("cats.jpg", model="jonathandinu/face-parsing")
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small"
+API_URL = "https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
@@ -69,12 +69,13 @@ output = query("cats.jpg")
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small",
+		"https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -95,10 +96,11 @@ query({ inputs: "cats.jpg" }).then((response) => {
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/openmmlab/upernet-convnext-small \
+curl https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: image/jpeg' \
diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md
index bf1c401a1..f4ca691e0 100644
--- a/docs/api-inference/tasks/image-text-to-text.md
+++ b/docs/api-inference/tasks/image-text-to-text.md
@@ -24,7 +24,6 @@ For more details about the `image-text-to-text` task, check out its [dedicated p
 
 ### Recommended models
 
-- [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct): Strong image-text-to-text model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-text-to-text&sort=trending).
 
@@ -33,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -44,8 +44,25 @@ client = InferenceClient(
 )
 
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct",
-    inputs="\"Can you please let us know more details about your \"",
+    model="google/gemma-3-27b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
 print(completion.choices[0].message)
@@ -54,12 +71,13 @@ print(completion.choices[0].message)
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions"
+API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -67,8 +85,25 @@ def query(payload):
     return response.json()
 
 response = query({
-    "inputs": "\"Can you please let us know more details about your \"",
-    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "model": "google/gemma-3-27b-it"
 })
 
 print(response["choices"][0]["message"])
@@ -76,19 +111,37 @@ print(response["choices"][0]["message"])
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="python" client="openai">
-        
+
 ```python
 from openai import OpenAI
 
 client = OpenAI(
-    base_url="https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+    base_url="https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
     api_key="hf_***"
 )
 
 completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct",
-    inputs="\"Can you please let us know more details about your \"",
+    model="google/gemma-3-27b-it",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
 )
 
 print(completion.choices[0].message)
@@ -96,8 +149,9 @@ print(completion.choices[0].message)
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
@@ -105,8 +159,25 @@ const client = new InferenceClient("hf_***");
 
 const chatCompletion = await client.chatCompletion({
     provider: "hf-inference",
-    model: "Qwen/Qwen2.5-VL-7B-Instruct",
-    inputs: "\"Can you please let us know more details about your \"",
+    model: "google/gemma-3-27b-it",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
 });
 
 console.log(chatCompletion.choices[0].message);
@@ -115,19 +186,37 @@ console.log(chatCompletion.choices[0].message);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="js" client="openai">
-        
+
 ```js
 import { OpenAI } from "openai";
 
 const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1",
+	baseURL: "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
 	apiKey: "hf_***",
 });
 
 const chatCompletion = await client.chat.completions.create({
-	model: "Qwen/Qwen2.5-VL-7B-Instruct",
-    inputs: "\"Can you please let us know more details about your \"",
+	model: "google/gemma-3-27b-it",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
 });
 
 console.log(chatCompletion.choices[0].message);
@@ -135,14 +224,257 @@ console.log(chatCompletion.choices[0].message);
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
+        "model": "google/gemma-3-27b-it",
+        "stream": false
+    }'
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hyperbolic",
+    api_key="hf_***",
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hyperbolic" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hyperbolic/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    "max_tokens": 500,
+    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
+})
+
+print(response["choices"][0]["message"])
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="python" client="openai">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://router.huggingface.co/hyperbolic/v1",
+    api_key="hf_***"
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }
+                }
+            ]
+        }
+    ],
+    max_tokens=500,
+)
+
+print(completion.choices[0].message)
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hyperbolic",
+    model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hyperbolic" language="js" client="openai">
+
+```js
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hyperbolic/v1",
+	apiKey: "hf_***",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "Qwen/Qwen2.5-VL-7B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: [
+                {
+                    type: "text",
+                    text: "Describe this image in one sentence.",
+                },
+                {
+                    type: "image_url",
+                    image_url: {
+                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
+                    },
+                },
+            ],
+        },
+    ],
+    max_tokens: 500,
+});
+
+console.log(chatCompletion.choices[0].message);
+```
+
+</snippet>
+
+
+<snippet provider="hyperbolic" language="sh" client="curl">
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/Qwen/Qwen2.5-VL-7B-Instruct/v1/chat/completions \
+curl https://router.huggingface.co/hyperbolic/v1/chat/completions \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
     -d '{
-        "inputs": "\"Can you please let us know more details about your \"",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in one sentence."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                        }
+                    }
+                ]
+            }
+        ],
+        "max_tokens": 500,
         "model": "Qwen/Qwen2.5-VL-7B-Instruct",
         "stream": false
     }'
diff --git a/docs/api-inference/tasks/image-to-image.md b/docs/api-inference/tasks/image-to-image.md
index 845731ba8..9596da927 100644
--- a/docs/api-inference/tasks/image-to-image.md
+++ b/docs/api-inference/tasks/image-to-image.md
@@ -37,8 +37,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -51,20 +52,21 @@ client = InferenceClient(
 image = client.image_to_image(
     "cat.png",
     prompt="Turn the cat into a tiger.",
-    model="<REPO_ID>",
+    model="stabilityai/stable-diffusion-xl-refiner-1.0",
 )
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import base64
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/<REPO_ID>"
+API_URL = "https://router.huggingface.co/hf-inference/models/stabilityai/stable-diffusion-xl-refiner-1.0"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
diff --git a/docs/api-inference/tasks/object-detection.md b/docs/api-inference/tasks/object-detection.md
index 544d9a6af..07ae70e02 100644
--- a/docs/api-inference/tasks/object-detection.md
+++ b/docs/api-inference/tasks/object-detection.md
@@ -24,7 +24,6 @@ For more details about the `object-detection` task, check out its [dedicated pag
 
 ### Recommended models
 
-- [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50): Solid object detection model pre-trained on the COCO 2017 dataset.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=object-detection&sort=trending).
 
@@ -33,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -49,8 +49,9 @@ output = client.object_detection("cats.jpg", model="facebook/detr-resnet-50")
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
@@ -68,8 +69,9 @@ output = query("cats.jpg")
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -94,8 +96,9 @@ query({ inputs: "cats.jpg" }).then((response) => {
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
 curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
     -X POST \
diff --git a/docs/api-inference/tasks/question-answering.md b/docs/api-inference/tasks/question-answering.md
index 56ccf9033..2ff3fded3 100644
--- a/docs/api-inference/tasks/question-answering.md
+++ b/docs/api-inference/tasks/question-answering.md
@@ -24,9 +24,6 @@ For more details about the `question-answering` task, check out its [dedicated p
 
 ### Recommended models
 
-- [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2): A robust baseline model for most question answering domains.
-- [distilbert/distilbert-base-cased-distilled-squad](https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad): Small yet robust model that can answer questions.
-- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A special model that can answer questions from tables.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
 
@@ -35,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -50,19 +48,20 @@ result = client.question_answering(
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
 },
-    model="deepset/roberta-base-squad2",
+    model="distilbert/distilbert-base-cased-distilled-squad",
 )
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2"
+API_URL = "https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -79,12 +78,13 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2",
+		"https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -108,15 +108,16 @@ query({ inputs: {
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
 const client = new InferenceClient("hf_***");
 
 const output = await client.questionAnswering({
-	model: "deepset/roberta-base-squad2",
+	model: "distilbert/distilbert-base-cased-distilled-squad",
 	inputs: {
 	"question": "What is my name?",
 	"context": "My name is Clara and I live in Berkeley."
@@ -130,10 +131,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/deepset/roberta-base-squad2 \
+curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
index daf273ea4..161fe3ed2 100644
--- a/docs/api-inference/tasks/summarization.md
+++ b/docs/api-inference/tasks/summarization.md
@@ -24,8 +24,6 @@ For more details about the `summarization` task, check out its [dedicated page](
 
 ### Recommended models
 
-- [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn): A strong summarization model trained on English news articles. Excels at generating factual summaries.
-- [Falconsai/medical_summarization](https://huggingface.co/Falconsai/medical_summarization): A summarization model trained on medical articles.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=summarization&sort=trending).
 
@@ -34,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -53,8 +52,9 @@ result = client.summarization(
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
@@ -72,8 +72,9 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -98,8 +99,9 @@ query({ inputs: "The tower is 324 metres (1,063 ft) tall, about the same height
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
@@ -117,8 +119,9 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
 curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
     -X POST \
diff --git a/docs/api-inference/tasks/table-question-answering.md b/docs/api-inference/tasks/table-question-answering.md
index 8e2357db2..76e68637a 100644
--- a/docs/api-inference/tasks/table-question-answering.md
+++ b/docs/api-inference/tasks/table-question-answering.md
@@ -24,163 +24,13 @@ For more details about the `table-question-answering` task, check out its [dedic
 
 ### Recommended models
 
-- [microsoft/tapex-base](https://huggingface.co/microsoft/tapex-base): A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.
-- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A robust table question answering model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=table-question-answering&sort=trending).
 
 ### Using the API
 
 
-<inferencesnippet>
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.table_question_answering(
-    inputs={
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-},
-    model="microsoft/tapex-base",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-<snippet provider="hf-inference" language="python" client="requests">
-        
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/microsoft/tapex-base"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": {
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-},
-})
-```
-
-</snippet>
-
-<snippet provider="hf-inference" language="js" client="fetch">
-        
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/microsoft/tapex-base",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: {
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-} }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-        
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.tableQuestionAnswering({
-	model: "microsoft/tapex-base",
-	inputs: {
-	"query": "How many stars does the transformers repository have?",
-	"table": {
-		"Repository": ["Transformers", "Datasets", "Tokenizers"],
-		"Stars": ["36542", "4512", "3934"],
-		"Contributors": ["651", "77", "34"],
-		"Programming language": [
-			"Python",
-			"Python",
-			"Rust, Python and NodeJS"
-		]
-	}
-},
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-<snippet provider="hf-inference" language="sh" client="curl">
-        
-```sh
-curl https://router.huggingface.co/hf-inference/models/microsoft/tapex-base \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "{\n\t\"query\": \"How many stars does the transformers repository have?\",\n\t\"table\": {\n\t\t\"Repository\": [\"Transformers\", \"Datasets\", \"Tokenizers\"],\n\t\t\"Stars\": [\"36542\", \"4512\", \"3934\"],\n\t\t\"Contributors\": [\"651\", \"77\", \"34\"],\n\t\t\"Programming language\": [\n\t\t\t\"Python\",\n\t\t\t\"Python\",\n\t\t\t\"Rust, Python and NodeJS\"\n\t\t]\n\t}\n}"
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+No snippet available for this task.
 
 
 
diff --git a/docs/api-inference/tasks/text-classification.md b/docs/api-inference/tasks/text-classification.md
index befb504ad..4aba37365 100644
--- a/docs/api-inference/tasks/text-classification.md
+++ b/docs/api-inference/tasks/text-classification.md
@@ -24,11 +24,6 @@ For more details about the `text-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [distilbert/distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english): A robust model trained for sentiment analysis.
-- [ProsusAI/finbert](https://huggingface.co/ProsusAI/finbert): A sentiment analysis model specialized in financial sentiment.
-- [cardiffnlp/twitter-roberta-base-sentiment-latest](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest): A sentiment analysis model specialized in analyzing tweets.
-- [papluca/xlm-roberta-base-language-detection](https://huggingface.co/papluca/xlm-roberta-base-language-detection): A model that can classify languages.
-- [meta-llama/Prompt-Guard-86M](https://huggingface.co/meta-llama/Prompt-Guard-86M): A model that can classify text generation attacks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
 
@@ -37,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -49,19 +45,20 @@ client = InferenceClient(
 
 result = client.text_classification(
     inputs="I like you. I love you",
-    model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+    model="ProsusAI/finbert",
 )
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english"
+API_URL = "https://router.huggingface.co/hf-inference/models/ProsusAI/finbert"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -75,12 +72,13 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+		"https://router.huggingface.co/hf-inference/models/ProsusAI/finbert",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -101,15 +99,16 @@ query({ inputs: "I like you. I love you" }).then((response) => {
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
 const client = new InferenceClient("hf_***");
 
 const output = await client.textClassification({
-	model: "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+	model: "ProsusAI/finbert",
 	inputs: "I like you. I love you",
 	provider: "hf-inference",
 });
@@ -120,10 +119,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english \
+curl https://router.huggingface.co/hf-inference/models/ProsusAI/finbert \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md
index 9df66dbed..0c5da2f63 100644
--- a/docs/api-inference/tasks/text-generation.md
+++ b/docs/api-inference/tasks/text-generation.md
@@ -26,12 +26,6 @@ For more details about the `text-generation` task, check out its [dedicated page
 
 ### Recommended models
 
-- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
-- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
-- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
-- [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
-- [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
-- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
 
@@ -40,8 +34,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -52,19 +47,20 @@ client = InferenceClient(
 
 result = client.text_generation(
     inputs="Can you please let us know more details about your ",
-    model="google/gemma-2-2b-it",
+    model="Qwen/QwQ-32B",
 )
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it"
+API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -78,12 +74,13 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it",
+		"https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -104,15 +101,16 @@ query({ inputs: "Can you please let us know more details about your " }).then((r
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
 const client = new InferenceClient("hf_***");
 
 const output = await client.textGeneration({
-	model: "google/gemma-2-2b-it",
+	model: "Qwen/QwQ-32B",
 	inputs: "Can you please let us know more details about your ",
 	provider: "hf-inference",
 });
@@ -123,10 +121,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it \
+curl https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
@@ -138,6 +137,109 @@ curl https://router.huggingface.co/hf-inference/models/google/gemma-2-2b-it \
 </snippet>
 
 
+<snippet provider="together" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="together",
+    api_key="hf_***",
+)
+
+result = client.text_generation(
+    inputs="Can you please let us know more details about your ",
+    model="deepseek-ai/DeepSeek-R1",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="together" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/together/v1/completions"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+output = query({
+    "inputs": "Can you please let us know more details about your ",
+})
+```
+
+</snippet>
+
+
+<snippet provider="together" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/together/v1/completions",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ inputs: "Can you please let us know more details about your " }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="together" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.textGeneration({
+	model: "deepseek-ai/DeepSeek-R1",
+	inputs: "Can you please let us know more details about your ",
+	provider: "together",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="together" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/together/v1/completions \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"Can you please let us know more details about your \"",
+        "model": "deepseek-ai/DeepSeek-R1"
+    }'
+```
+
+</snippet>
+
+
 </inferencesnippet>
 
 
diff --git a/docs/api-inference/tasks/text-to-image.md b/docs/api-inference/tasks/text-to-image.md
index 9f919e80e..3213c6ed2 100644
--- a/docs/api-inference/tasks/text-to-image.md
+++ b/docs/api-inference/tasks/text-to-image.md
@@ -24,9 +24,6 @@ For more details about the `text-to-image` task, check out its [dedicated page](
 
 ### Recommended models
 
-- [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
-- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
-- [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
 
@@ -35,8 +32,94 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
+<snippet provider="fal-ai" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="fal-ai",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="fal-ai" language="python" client="fal_client">
+
+```python
+import fal_client
+
+result = fal_client.subscribe(
+    "fal-ai/flux/dev",
+    arguments={
+        "prompt": "Astronaut riding a horse",
+    },
+)
+print(result)
+```
+
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/fal-ai/fal-ai/flux/dev",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="fal-ai" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "fal-ai",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -55,8 +138,9 @@ image = client.text_to_image(
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
@@ -79,8 +163,9 @@ image = Image.open(io.BytesIO(image_bytes))
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
@@ -105,8 +190,9 @@ query({ inputs: "Astronaut riding a horse" }).then((response) => {
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
@@ -125,6 +211,210 @@ const image = await client.textToImage({
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
 
+<snippet provider="nebius" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="nebius",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="nebius" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/nebius/v1/images/generations",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="nebius" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "nebius",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="replicate" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="replicate",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="replicate" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/replicate/v1/models/black-forest-labs/flux-dev/predictions",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs:  }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="replicate" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "replicate",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="together" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="together",
+    api_key="hf_***",
+)
+
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="black-forest-labs/FLUX.1-dev",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="together" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/together/v1/images/generations",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+
+query({ inputs: "Astronaut riding a horse" }).then((response) => {
+    // Use image
+});
+```
+
+</snippet>
+
+
+<snippet provider="together" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const image = await client.textToImage({
+    provider: "together",
+    model: "black-forest-labs/FLUX.1-dev",
+	inputs: "Astronaut riding a horse",
+	parameters: { num_inference_steps: 5 },
+});
+/// Use the generated image (it's a Blob)
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 </inferencesnippet>
 
 
diff --git a/docs/api-inference/tasks/token-classification.md b/docs/api-inference/tasks/token-classification.md
index 1a8bac740..296c9f139 100644
--- a/docs/api-inference/tasks/token-classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -24,10 +24,6 @@ For more details about the `token-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [dslim/bert-base-NER](https://huggingface.co/dslim/bert-base-NER): A robust performance model to identify people, locations, organizations and names of miscellaneous entities.
-- [FacebookAI/xlm-roberta-large-finetuned-conll03-english](https://huggingface.co/FacebookAI/xlm-roberta-large-finetuned-conll03-english): A strong model to identify people, locations, organizations and names in multiple languages.
-- [blaze999/Medical-NER](https://huggingface.co/blaze999/Medical-NER): A token classification model specialized on medical entity recognition.
-- [flair/ner-english](https://huggingface.co/flair/ner-english): Flair models are typically the state of the art in named entity recognition tasks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=token-classification&sort=trending).
 
@@ -36,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -48,19 +45,20 @@ client = InferenceClient(
 
 result = client.token_classification(
     inputs="My name is Sarah Jessica Parker but you can call me Jessica",
-    model="dslim/bert-base-NER",
+    model="FacebookAI/xlm-roberta-large-finetuned-conll03-english",
 )
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER"
+API_URL = "https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -74,12 +72,13 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER",
+		"https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -100,15 +99,16 @@ query({ inputs: "My name is Sarah Jessica Parker but you can call me Jessica" })
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
 const client = new InferenceClient("hf_***");
 
 const output = await client.tokenClassification({
-	model: "dslim/bert-base-NER",
+	model: "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
 	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
 	provider: "hf-inference",
 });
@@ -119,10 +119,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
+curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
@@ -168,9 +169,6 @@ For more information about Inference API headers, check out the parameters [guid
 
 #### Response
 
-Output type depends on the `stream` input parameter.
-If `stream` is `false` (default), the response will be a JSON object with the following fields:
-
 | Body |  |
 | :--- | :--- | :--- |
 | **(array)** | _object[]_ | Output is an array of objects. |
@@ -182,8 +180,3 @@ If `stream` is `false` (default), the response will be a JSON object with the fo
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where this group ends. |
 
 
-If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
-For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
-
-
-
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index 8db4e2845..ca5572f92 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -24,8 +24,6 @@ For more details about the `translation` task, check out its [dedicated page](ht
 
 ### Recommended models
 
-- [facebook/nllb-200-1.3B](https://huggingface.co/facebook/nllb-200-1.3B): Very powerful model that can translate many languages between each other, especially low-resource languages.
-- [google-t5/t5-base](https://huggingface.co/google-t5/t5-base): A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=translation&sort=trending).
 
@@ -34,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="huggingface_hub">
-        
+
 ```python
 from huggingface_hub import InferenceClient
 
@@ -46,19 +45,20 @@ client = InferenceClient(
 
 result = client.translation(
     inputs="Меня зовут Вольфганг и я живу в Берлине",
-    model="facebook/nllb-200-1.3B",
+    model="facebook/mbart-large-50-many-to-many-mmt",
 )
 ```
 
 </snippet>
 
 To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -72,12 +72,13 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B",
+		"https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -98,15 +99,16 @@ query({ inputs: "Меня зовут Вольфганг и я живу в Бер
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="huggingface.js">
-        
+
 ```js
 import { InferenceClient } from "@huggingface/inference";
 
 const client = new InferenceClient("hf_***");
 
 const output = await client.translation({
-	model: "facebook/nllb-200-1.3B",
+	model: "facebook/mbart-large-50-many-to-many-mmt",
 	inputs: "Меня зовут Вольфганг и я живу в Берлине",
 	provider: "hf-inference",
 });
@@ -117,10 +119,11 @@ console.log(output);
 </snippet>
 
 To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
-curl https://router.huggingface.co/hf-inference/models/facebook/nllb-200-1.3B \
+curl https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
diff --git a/docs/api-inference/tasks/zero-shot-classification.md b/docs/api-inference/tasks/zero-shot-classification.md
index b497ee503..619ddf3cc 100644
--- a/docs/api-inference/tasks/zero-shot-classification.md
+++ b/docs/api-inference/tasks/zero-shot-classification.md
@@ -24,7 +24,6 @@ For more details about the `zero-shot-classification` task, check out its [dedic
 
 ### Recommended models
 
-- [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli): Powerful zero-shot text classification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=zero-shot-classification&sort=trending).
 
@@ -33,8 +32,9 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <inferencesnippet>
 
+
 <snippet provider="hf-inference" language="python" client="requests">
-        
+
 ```python
 import requests
 
@@ -53,8 +53,9 @@ output = query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="js" client="fetch">
-        
+
 ```js
 async function query(data) {
     const response = await fetch(
@@ -82,8 +83,9 @@ query({
 
 </snippet>
 
+
 <snippet provider="hf-inference" language="sh" client="curl">
-        
+
 ```sh
 curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
     -X POST \
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index be80ccf09..36c7d7e4d 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -1,4 +1,8 @@
-import { snippets } from "@huggingface/inference";
+import {
+  snippets,
+  INFERENCE_PROVIDERS,
+  InferenceProvider,
+} from "@huggingface/inference";
 import {
   PipelineType,
   InferenceSnippet,
@@ -32,8 +36,22 @@ const TASKS: PipelineType[] = [
 const TASKS_EXTENDED = [...TASKS, "chat-completion"];
 const SPECS_REVISION = "main";
 
-const inferenceSnippetLanguages = ["python", "js", "sh"] as const;
-type InferenceSnippetLanguage = (typeof inferenceSnippetLanguages)[number];
+const HEADERS = { Authorization: `Bearer ${process.env.HF_TOKEN}` };
+
+async function authFetchJson(url: string) {
+  const headers = url.includes("huggingface.co") ? HEADERS : {};
+  try {
+    const res = await fetch(url, { headers: headers });
+    if (!res.ok) {
+      console.warn(`Failed to fetch ${url}: ${await res.text()}`);
+      return {};
+    }
+    return res.json();
+  } catch (e) {
+    console.warn(`Failed to fetch ${url}: ${e}`);
+    return {};
+  }
+}
 
 // Taken from https://stackoverflow.com/a/31632215
 Handlebars.registerHelper({
@@ -53,6 +71,39 @@ Handlebars.registerHelper({
 
 console.log("🛠️  Preparing...");
 
+////////////////////////
+//// Provider utils ////
+////////////////////////
+
+/// PER_TASK_SUPPORTED_PROVIDERS[task] = provider[]
+const PER_TASK_SUPPORTED_PROVIDERS: Record<string, string[]> = {};
+
+await Promise.all(
+  INFERENCE_PROVIDERS.map(async (provider) => {
+    if (provider == "hf-inference") {
+      return; // handled separately
+    }
+    console.log("   ⚡ Fetching supported tasks for provider " + provider);
+    const url = `https://huggingface.co/api/partners/${provider}/models`;
+    const mapping = (await authFetchJson(url)) as Record<
+      string,
+      Record<string, { status: "live" | "staging"; providerId: string }>
+    >;
+
+    for (const [task, models] of Object.entries(mapping)) {
+      for (const [modelId, modelMapping] of Object.entries(models)) {
+        if (modelMapping.status == "live") {
+          if (!PER_TASK_SUPPORTED_PROVIDERS[task]) {
+            PER_TASK_SUPPORTED_PROVIDERS[task] = [];
+          }
+          PER_TASK_SUPPORTED_PROVIDERS[task].push(provider);
+          break;
+        }
+      }
+    }
+  })
+);
+
 ////////////////////////
 //// Filepath utils ////
 ////////////////////////
@@ -98,35 +149,39 @@ function writeTaskDoc(templateName: string, content: string): Promise<void> {
 
 const TASKS_API_URL = "https://huggingface.co/api/tasks";
 console.log(`   🕸️  Fetching ${TASKS_API_URL}`);
-const response = await fetch(TASKS_API_URL);
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
-const TASKS_DATA = (await response.json()) as any;
+const TASKS_DATA = (await authFetchJson(TASKS_API_URL)) as any;
 
 ///////////////////////
 //// Snippet utils ////
 ///////////////////////
 
 export function getFormattedInferenceSnippet(
-  id: string,
   pipeline_tag: PipelineType,
-  config?: JsonObject,
-  tags?: string[]
+  model: {
+    modelId: string;
+    provider: string;
+    providerModelId: string;
+    providerTask: string;
+    tags: string[];
+  },
+  conversational: boolean
 ): InferenceSnippet[] {
-  const modelData = {
-    id,
-    pipeline_tag,
-    mask_token: "[MASK]",
-    library_name: "",
-    config: config ?? {},
-    tags: tags ?? [],
-    inference: "",
-  } as ModelDataMinimal;
-  // @ts-ignore
+  if (conversational && !model.tags.includes("conversational")) {
+    return [];
+  }
   return snippets.getInferenceSnippets(
-    modelData,
+    {
+      id: model.modelId,
+      pipeline_tag,
+      mask_token: "[MASK]",
+      library_name: "",
+      tags: conversational ? ["conversational"] : [],
+      inference: "",
+    } as ModelDataMinimal,
     "hf_***",
-    "hf-inference",
-    modelData.id
+    model.provider as InferenceProvider,
+    model.providerModelId
   );
 }
 
@@ -147,9 +202,7 @@ async function fetchOneSpec(
 ): Promise<JsonObject | undefined> {
   const url = SPECS_URL_TEMPLATE({ task, name });
   console.log(`   🕸️  Fetching ${task} ${name} specs`);
-  return fetch(url)
-    .then((res) => res.json())
-    .catch(() => undefined);
+  return (await authFetchJson(url)) ?? undefined;
 }
 
 async function fetchSpecs(
@@ -166,7 +219,7 @@ async function fetchSpecs(
 
 async function fetchCommonDefinitions(): Promise<JsonObject> {
   console.log(`   🕸️  Fetching common definitions`);
-  return fetch(COMMON_DEFINITIONS_URL).then((res) => res.json());
+  return await authFetchJson(COMMON_DEFINITIONS_URL);
 }
 
 const COMMON_DEFINITIONS = await fetchCommonDefinitions();
@@ -347,7 +400,17 @@ const DATA: {
       id: string;
       description: string;
       inference: string | undefined;
-      config: JsonObject | undefined;
+      tags: string[];
+    }[]
+  >;
+  perProviderWarmModels: Record<
+    string,
+    {
+      modelId: string;
+      provider: string;
+      providerModelId: string;
+      providerTask: string;
+      tags: string[];
     }[]
   >;
   snippets: Record<string, string>;
@@ -368,12 +431,13 @@ const DATA: {
     specsHeaders: SPECS_HEADERS,
   },
   recommendedModels: {},
+  perProviderWarmModels: {},
   snippets: {},
   specs: {},
   tips: { linksToTaskPage: {}, listModelsLink: {} },
 };
 
-// Check for each model if inference status is "warm"
+// For each task, retrieve inference status of all recommended models
 await Promise.all(
   TASKS.map(async (task) => {
     await Promise.all(
@@ -382,39 +446,101 @@ await Promise.all(
           id: string;
           description: string;
           inference: string | undefined;
-          config: JsonObject | undefined;
         }) => {
           console.log(`   ⚡ Checking inference status ${model.id}`);
-          let url = `https://huggingface.co/api/models/${model.id}?expand[]=inference`;
-          if (task === "text-generation" || task === "image-text-to-text") {
-            url += "&expand[]=config";
-          }
-          const modelData = await fetch(url).then((res) => res.json());
+          let url = `https://huggingface.co/api/models/${model.id}?expand[]=inference&expand[]=tags`;
+          const modelData = await authFetchJson(url);
           model.inference = modelData.inference;
-          model.config = modelData.config;
         }
       )
     );
   })
 );
 
-// Fetch recommended models
+async function fetchWarmModels(task: PipelineType): Promise<
+  {
+    modelId: string;
+    provider: string;
+    providerModelId: string;
+    providerTask: string;
+    tags: string[];
+  }[]
+> {
+  const providers = [
+    "hf-inference",
+    ...(PER_TASK_SUPPORTED_PROVIDERS[task] ?? []),
+  ].sort();
+  return (
+    await Promise.all(
+      providers.map(async (provider) => {
+        console.log(
+          `   ⚡ Fetching most popular warm model for ${task} from ${provider}`
+        );
+        const url = `https://huggingface.co/api/models?pipeline_tag=${task}&inference_provider=${provider}&sort=likes30d&expand[]=inferenceProviderMapping&expand[]=tags&limit=5`;
+        const modelsData = (await authFetchJson(url)) as {
+          id: string;
+          likes30d: number;
+          inferenceProviderMapping: Record<string, string>[];
+          tags: string[];
+        }[];
+        if (modelsData.length === 0) {
+          return;
+        }
+
+        /// Little hack: if there are multiple models with the same number of likes (typically 0), we arbitrarily pick the one with the smallest ID to get a deterministic result
+        const topLikes = modelsData[0].likes30d;
+        const topModelData = modelsData
+          .filter((model) => model.likes30d === topLikes)
+          .sort((a, b) => a.id.localeCompare(b.id))[0];
+
+        const providerMapping = topModelData.inferenceProviderMapping as
+          | Record<string, string>[]
+          | undefined;
+        if (!providerMapping) {
+          return;
+        }
+        const providerData = providerMapping.filter(
+          (mapping) => mapping.provider === provider
+        )[0];
+        return {
+          modelId: topModelData.id,
+          provider: provider,
+          providerModelId: providerData.providerId,
+          providerTask: providerData.task,
+          tags: topModelData.tags,
+        };
+      })
+    )
+  ).filter((model) => model !== undefined);
+}
+
+// For each task and for each provider, retrieve the most popular warm model
+await Promise.all(
+  TASKS.map(async (task) => {
+    DATA.perProviderWarmModels[task] = await fetchWarmModels(task);
+  })
+);
+
+// Filter recommended models (i.e. recommended + warm)
 TASKS.forEach((task) => {
   DATA.recommendedModels[task] = TASKS_DATA[task].models.filter(
-    (model: { inference: string }) =>
-      ["cold", "loading", "warm"].includes(model.inference)
+    (model: { inference: string }) => model.inference === "warm"
   );
 });
 
-// Fetch snippets
-// TODO: render snippets only if they are available
+// Generate snippets
 TASKS.forEach((task) => {
-  // Let's take as example the first available model that is recommended.
-  // Otherwise, fallback to "<REPO_ID>".
-  const mainModel = DATA.recommendedModels[task][0]?.id ?? "<REPO_ID>";
-  const taskSnippets = getFormattedInferenceSnippet(mainModel, task);
+  const inferenceSnippets = DATA.perProviderWarmModels[task].flatMap((model) =>
+    getFormattedInferenceSnippet(task, model, false).map(
+      (inferenceSnippet) => ({
+        ...inferenceSnippet,
+        provider: model.provider,
+      })
+    )
+  );
+
   DATA.snippets[task] = SNIPPETS_TEMPLATE({
-    taskSnippets,
+    inferenceSnippets,
     taskSnakeCase: task.replaceAll("-", "_"),
     taskAttached: task.replaceAll("-", ""),
   });
@@ -451,47 +577,52 @@ TASKS.forEach((task) => {
 //// Data for chat-completion special case ////
 ///////////////////////////////////////////////
 
-function fetchChatCompletion() {
-  const baseName = "chat-completion";
-  const conversationalTasks = [
-    {
-      name: "chat-completion",
-      baseName: "text-generation",
-      pipelineTag: "text-generation",
-    },
-    {
-      name: "conversational-image-text-to-text",
-      baseName: "image-text-to-text",
-      pipelineTag: "image-text-to-text",
-    },
-  ];
-
-  conversationalTasks.forEach((task) => {
-    // Recommended models based on the base task
-    DATA.recommendedModels[task.name] = DATA.recommendedModels[
-      task.baseName
-    ].filter(
-      // @ts-ignore
-      (model) => model.config?.tokenizer_config?.chat_template
-    );
-
-    const mainModel = DATA.recommendedModels[task.name][0];
+async function fetchChatCompletion() {
+  // Conversational text-generation
+  console.log(
+    "   ⚡ Prepare data for chat-completion (conversational text-generation)"
+  );
+  DATA.recommendedModels["chat-completion"] = DATA.recommendedModels[
+    "text-generation"
+  ].filter((model) => model.tags?.includes("conversational"));
+  DATA.snippets["chat-completion"] = SNIPPETS_TEMPLATE({
+    taskSnakeCase: "chat_completion",
+    taskAttached: "chatCompletion",
+    inferenceSnippets: (await fetchWarmModels("text-generation")).flatMap(
+      (model) =>
+        getFormattedInferenceSnippet("text-generation", model, true).map(
+          (inferenceSnippet) => ({
+            ...inferenceSnippet,
+            provider: model.provider,
+          })
+        )
+    ),
+  });
 
-    const taskSnippets = getFormattedInferenceSnippet(
-      mainModel.id,
-      task.pipelineTag as PipelineType,
-      mainModel.config,
-      ["conversational"]
+  // Conversational image-text-to-text
+  console.log(
+    "   ⚡ Prepare data for chat-completion (conversational image-text-to-text)"
+  );
+  DATA.recommendedModels["conversational-image-text-to-text"] =
+    DATA.recommendedModels["image-text-to-text"].filter((model) =>
+      model.tags?.includes("conversational")
     );
-    DATA.snippets[task.name] = SNIPPETS_TEMPLATE({
-      taskSnippets,
-      taskSnakeCase: baseName.replaceAll("-", "_"),
-      taskAttached: baseName.replaceAll("-", ""),
-    });
+  DATA.snippets["conversational-image-text-to-text"] = SNIPPETS_TEMPLATE({
+    taskSnakeCase: "chat_completion",
+    taskAttached: "chatCompletion",
+    inferenceSnippets: (await fetchWarmModels("image-text-to-text")).flatMap(
+      (model) =>
+        getFormattedInferenceSnippet("image-text-to-text", model, true).map(
+          (inferenceSnippet) => ({
+            ...inferenceSnippet,
+            provider: model.provider,
+          })
+        )
+    ),
   });
 }
 
-fetchChatCompletion();
+await fetchChatCompletion();
 
 /////////////////////////
 //// Rendering utils ////
diff --git a/scripts/api-inference/templates/common/snippets-template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars
index 96c382ef5..2f974071d 100644
--- a/scripts/api-inference/templates/common/snippets-template.handlebars
+++ b/scripts/api-inference/templates/common/snippets-template.handlebars
@@ -1,9 +1,10 @@
-{{#if taskSnippets.length }}
+{{#if inferenceSnippets.length }}
 
 <inferencesnippet>
 
-{{#each taskSnippets}}
-<snippet provider="hf-inference" language="{{this.language}}" client="{{this.client}}">
+{{#each inferenceSnippets}}
+
+<snippet provider="{{this.provider}}" language="{{this.language}}" client="{{this.client}}">
 
 ```{{this.language}}
 {{{this.content}}}
diff --git a/scripts/api-inference/templates/task/image-text-to-text.handlebars b/scripts/api-inference/templates/task/image-text-to-text.handlebars
index 8aa03f37e..7f2554d78 100644
--- a/scripts/api-inference/templates/task/image-text-to-text.handlebars
+++ b/scripts/api-inference/templates/task/image-text-to-text.handlebars
@@ -6,7 +6,7 @@ Image-text-to-text models take in an image and text prompt and output text. Thes
 
 ### Recommended models
 
-{{#each models.image-text-to-text}}
+{{#each models.conversational-image-text-to-text}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
@@ -14,7 +14,7 @@ Image-text-to-text models take in an image and text prompt and output text. Thes
 
 ### Using the API
 
-{{{snippets.image-text-to-text}}}
+{{{snippets.conversational-image-text-to-text}}}
 
 ### API specification
 
diff --git a/scripts/api-inference/templates/task/token-classification.handlebars b/scripts/api-inference/templates/task/token-classification.handlebars
index 4a627783f..9045de0ba 100644
--- a/scripts/api-inference/templates/task/token-classification.handlebars
+++ b/scripts/api-inference/templates/task/token-classification.handlebars
@@ -26,13 +26,5 @@ Token classification is a task in which a label is assigned to some tokens in a
 
 #### Response
 
-Output type depends on the `stream` input parameter.
-If `stream` is `false` (default), the response will be a JSON object with the following fields:
-
 {{{specs.token-classification.output}}}
 
-If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
-For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
-
-{{{specs.token-classification.stream_output}}}
-

From 111eca8186af2fdf8f86cd60d372b87faa398d3e Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Fri, 21 Mar 2025 15:50:04 +0100
Subject: [PATCH 3/6] update

---
 .../api-inference/tasks/feature-extraction.md |  10 +-
 docs/api-inference/tasks/fill-mask.md         | 107 +++++++++++++++++-
 .../tasks/token-classification.md             |  10 +-
 docs/api-inference/tasks/translation.md       |  10 +-
 4 files changed, 121 insertions(+), 16 deletions(-)

diff --git a/docs/api-inference/tasks/feature-extraction.md b/docs/api-inference/tasks/feature-extraction.md
index b246abe87..8c411637a 100644
--- a/docs/api-inference/tasks/feature-extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -50,7 +50,7 @@ client = InferenceClient(
 
 result = client.feature_extraction(
     inputs="Today is a sunny day and I will get some ice cream.",
-    model="mixedbread-ai/mxbai-embed-large-v1",
+    model="intfloat/multilingual-e5-large-instruct",
 )
 ```
 
@@ -63,7 +63,7 @@ To use the Python `InferenceClient`, see the [package reference](https://hugging
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1"
+API_URL = "https://router.huggingface.co/hf-inference/pipeline/feature-extraction/intfloat/multilingual-e5-large-instruct"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -83,7 +83,7 @@ output = query({
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1",
+		"https://router.huggingface.co/hf-inference/pipeline/feature-extraction/intfloat/multilingual-e5-large-instruct",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -113,7 +113,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient("hf_***");
 
 const output = await client.featureExtraction({
-	model: "mixedbread-ai/mxbai-embed-large-v1",
+	model: "intfloat/multilingual-e5-large-instruct",
 	inputs: "Today is a sunny day and I will get some ice cream.",
 	provider: "hf-inference",
 });
@@ -128,7 +128,7 @@ To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package referen
 <snippet provider="hf-inference" language="sh" client="curl">
 
 ```sh
-curl https://router.huggingface.co/hf-inference/pipeline/feature-extraction/mixedbread-ai/mxbai-embed-large-v1 \
+curl https://router.huggingface.co/hf-inference/pipeline/feature-extraction/intfloat/multilingual-e5-large-instruct \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index 5eac91ac9..ffdd7ff2c 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -30,7 +30,112 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-No snippet available for this task.
+<inferencesnippet>
+
+
+<snippet provider="hf-inference" language="python" client="huggingface_hub">
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_***",
+)
+
+result = client.fill_mask(
+    inputs="The answer to the universe is [MASK].",
+    model="google-bert/bert-base-multilingual-cased",
+)
+```
+
+</snippet>
+
+To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
+
+<snippet provider="hf-inference" language="python" client="requests">
+
+```python
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/google-bert/bert-base-multilingual-cased"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+output = query({
+    "inputs": "The answer to the universe is [MASK].",
+})
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="fetch">
+
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://router.huggingface.co/hf-inference/models/google-bert/bert-base-multilingual-cased",
+		{
+			headers: {
+				Authorization: "Bearer hf_***",
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({ inputs: "The answer to the universe is [MASK]." }).then((response) => {
+    console.log(JSON.stringify(response));
+});
+```
+
+</snippet>
+
+
+<snippet provider="hf-inference" language="js" client="huggingface.js">
+
+```js
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_***");
+
+const output = await client.fillMask({
+	model: "google-bert/bert-base-multilingual-cased",
+	inputs: "The answer to the universe is [MASK].",
+	provider: "hf-inference",
+});
+
+console.log(output);
+```
+
+</snippet>
+
+To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
+
+<snippet provider="hf-inference" language="sh" client="curl">
+
+```sh
+curl https://router.huggingface.co/hf-inference/models/google-bert/bert-base-multilingual-cased \
+    -X POST \
+    -H 'Authorization: Bearer hf_***' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "inputs": "\"The answer to the universe is [MASK].\""
+    }'
+```
+
+</snippet>
+
+
+</inferencesnippet>
 
 
 
diff --git a/docs/api-inference/tasks/token-classification.md b/docs/api-inference/tasks/token-classification.md
index 296c9f139..fbff55813 100644
--- a/docs/api-inference/tasks/token-classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -45,7 +45,7 @@ client = InferenceClient(
 
 result = client.token_classification(
     inputs="My name is Sarah Jessica Parker but you can call me Jessica",
-    model="FacebookAI/xlm-roberta-large-finetuned-conll03-english",
+    model="dslim/bert-base-NER",
 )
 ```
 
@@ -58,7 +58,7 @@ To use the Python `InferenceClient`, see the [package reference](https://hugging
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english"
+API_URL = "https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -78,7 +78,7 @@ output = query({
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english",
+		"https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -108,7 +108,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient("hf_***");
 
 const output = await client.tokenClassification({
-	model: "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
+	model: "dslim/bert-base-NER",
 	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
 	provider: "hf-inference",
 });
@@ -123,7 +123,7 @@ To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package referen
 <snippet provider="hf-inference" language="sh" client="curl">
 
 ```sh
-curl https://router.huggingface.co/hf-inference/models/FacebookAI/xlm-roberta-large-finetuned-conll03-english \
+curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index ca5572f92..e65b1c185 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -45,7 +45,7 @@ client = InferenceClient(
 
 result = client.translation(
     inputs="Меня зовут Вольфганг и я живу в Берлине",
-    model="facebook/mbart-large-50-many-to-many-mmt",
+    model="facebook/nllb-200-distilled-600M",
 )
 ```
 
@@ -58,7 +58,7 @@ To use the Python `InferenceClient`, see the [package reference](https://hugging
 ```python
 import requests
 
-API_URL = "https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt"
+API_URL = "https://router.huggingface.co/hf-inference/models/facebook/nllb-200-distilled-600M"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -78,7 +78,7 @@ output = query({
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt",
+		"https://router.huggingface.co/hf-inference/models/facebook/nllb-200-distilled-600M",
 		{
 			headers: {
 				Authorization: "Bearer hf_***",
@@ -108,7 +108,7 @@ import { InferenceClient } from "@huggingface/inference";
 const client = new InferenceClient("hf_***");
 
 const output = await client.translation({
-	model: "facebook/mbart-large-50-many-to-many-mmt",
+	model: "facebook/nllb-200-distilled-600M",
 	inputs: "Меня зовут Вольфганг и я живу в Берлине",
 	provider: "hf-inference",
 });
@@ -123,7 +123,7 @@ To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package referen
 <snippet provider="hf-inference" language="sh" client="curl">
 
 ```sh
-curl https://router.huggingface.co/hf-inference/models/facebook/mbart-large-50-many-to-many-mmt \
+curl https://router.huggingface.co/hf-inference/models/facebook/nllb-200-distilled-600M \
     -X POST \
     -H 'Authorization: Bearer hf_***' \
     -H 'Content-Type: application/json' \

From cf5d962aa4d97624ff17d228eeed81067e20bfa0 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 2 Apr 2025 18:06:20 +0200
Subject: [PATCH 4/6] big step

---
 .../tasks/automatic-speech-recognition.md     | 205 +--------
 .../tasks/feature-extraction.md               | 109 +----
 docs/inference-providers/tasks/fill-mask.md   | 109 +----
 .../tasks/image-classification.md             | 106 +----
 .../tasks/image-segmentation.md               |  84 +---
 .../tasks/image-to-image.md                   |  61 +--
 .../tasks/object-detection.md                 |  84 +---
 .../tasks/question-answering.md               | 121 +-----
 .../tasks/summarization.md                    | 109 +----
 .../tasks/text-classification.md              | 109 +----
 .../tasks/text-generation.md                  | 212 +---------
 .../tasks/text-to-image.md                    | 389 +-----------------
 .../tasks/token-classification.md             | 109 +----
 docs/inference-providers/tasks/translation.md | 109 +----
 .../tasks/zero-shot-classification.md         |  71 +---
 .../inference-providers/scripts/generate.ts   | 117 +++---
 .../common/snippets-template.handlebars       |  28 +-
 .../task/image-text-to-text.handlebars        |  23 --
 18 files changed, 114 insertions(+), 2041 deletions(-)
 delete mode 100644 scripts/inference-providers/templates/task/image-text-to-text.handlebars

diff --git a/docs/inference-providers/tasks/automatic-speech-recognition.md b/docs/inference-providers/tasks/automatic-speech-recognition.md
index a45d9f757..78cedd284 100644
--- a/docs/inference-providers/tasks/automatic-speech-recognition.md
+++ b/docs/inference-providers/tasks/automatic-speech-recognition.md
@@ -35,208 +35,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=automatic-speech-recognition
 
-
-<snippet provider="fal-ai" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="fal-ai",
-    api_key="hf_***",
-)
-
-output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3")
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="fal-ai" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/fal-ai/fal-ai/whisper"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
-    return response.json()
-
-output = query("sample1.flac")
-```
-
-</snippet>
-
-
-<snippet provider="fal-ai" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/fal-ai/fal-ai/whisper",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "audio/flac"
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "sample1.flac" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="fal-ai" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const data = fs.readFileSync("sample1.flac");
-
-const output = await client.automaticSpeechRecognition({
-	data,
-	model: "openai/whisper-large-v3",
-	provider: "fal-ai",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="fal-ai" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/fal-ai/fal-ai/whisper \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: audio/flac' \
-    --data-binary @"sample1.flac"
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3-turbo")
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers={"Content-Type": "audio/flac", **headers}, data=data)
-    return response.json()
-
-output = query("sample1.flac")
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "audio/flac"
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "sample1.flac" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const data = fs.readFileSync("sample1.flac");
-
-const output = await client.automaticSpeechRecognition({
-	data,
-	model: "openai/whisper-large-v3-turbo",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3-turbo \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: audio/flac' \
-    --data-binary @"sample1.flac"
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"fal-ai":{"modelId":"openai/whisper-large-v3","providerModelId":"fal-ai/whisper"},"hf-inference":{"modelId":"openai/whisper-large-v3-turbo","providerModelId":"openai/whisper-large-v3-turbo"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/feature-extraction.md b/docs/inference-providers/tasks/feature-extraction.md
index df3ec3823..de1d85461 100644
--- a/docs/inference-providers/tasks/feature-extraction.md
+++ b/docs/inference-providers/tasks/feature-extraction.md
@@ -35,112 +35,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=feature-extraction
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.feature_extraction(
-    inputs="Today is a sunny day and I will get some ice cream.",
-    model="intfloat/multilingual-e5-large-instruct",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/pipeline/feature-extraction/intfloat/multilingual-e5-large-instruct"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "Today is a sunny day and I will get some ice cream.",
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/pipeline/feature-extraction/intfloat/multilingual-e5-large-instruct",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "Today is a sunny day and I will get some ice cream." }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.featureExtraction({
-	model: "intfloat/multilingual-e5-large-instruct",
-	inputs: "Today is a sunny day and I will get some ice cream.",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/pipeline/feature-extraction/intfloat/multilingual-e5-large-instruct \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"Today is a sunny day and I will get some ice cream.\""
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"mixedbread-ai/mxbai-embed-large-v1","providerModelId":"mixedbread-ai/mxbai-embed-large-v1"},"sambanova":{"modelId":"intfloat/e5-mistral-7b-instruct","providerModelId":"E5-Mistral-7B-Instruct"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/fill-mask.md b/docs/inference-providers/tasks/fill-mask.md
index cc1674a50..f23053874 100644
--- a/docs/inference-providers/tasks/fill-mask.md
+++ b/docs/inference-providers/tasks/fill-mask.md
@@ -30,112 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=fill-mask
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.fill_mask(
-    inputs="The answer to the universe is [MASK].",
-    model="google-bert/bert-base-multilingual-cased",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/google-bert/bert-base-multilingual-cased"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "The answer to the universe is [MASK].",
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/google-bert/bert-base-multilingual-cased",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "The answer to the universe is [MASK]." }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.fillMask({
-	model: "google-bert/bert-base-multilingual-cased",
-	inputs: "The answer to the universe is [MASK].",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/google-bert/bert-base-multilingual-cased \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"The answer to the universe is [MASK].\""
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"google-bert/bert-base-multilingual-cased","providerModelId":"google-bert/bert-base-multilingual-cased"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/image-classification.md b/docs/inference-providers/tasks/image-classification.md
index 9718341e6..ec999d3db 100644
--- a/docs/inference-providers/tasks/image-classification.md
+++ b/docs/inference-providers/tasks/image-classification.md
@@ -30,109 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=image-classification
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-output = client.image_classification("cats.jpg", model="Falconsai/nsfw_image_detection")
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
-    return response.json()
-
-output = query("cats.jpg")
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "image/jpeg"
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "cats.jpg" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const data = fs.readFileSync("cats.jpg");
-
-const output = await client.imageClassification({
-	data,
-	model: "Falconsai/nsfw_image_detection",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/Falconsai/nsfw_image_detection \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: image/jpeg' \
-    --data-binary @"cats.jpg"
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"Falconsai/nsfw_image_detection","providerModelId":"Falconsai/nsfw_image_detection"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/image-segmentation.md b/docs/inference-providers/tasks/image-segmentation.md
index 8dcae964c..df1510e4f 100644
--- a/docs/inference-providers/tasks/image-segmentation.md
+++ b/docs/inference-providers/tasks/image-segmentation.md
@@ -30,87 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=image-segmentation
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-output = client.image_segmentation("cats.jpg", model="jonathandinu/face-parsing")
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
-    return response.json()
-
-output = query("cats.jpg")
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "image/jpeg"
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "cats.jpg" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/jonathandinu/face-parsing \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: image/jpeg' \
-    --data-binary @"cats.jpg"
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"jonathandinu/face-parsing","providerModelId":"jonathandinu/face-parsing"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/image-to-image.md b/docs/inference-providers/tasks/image-to-image.md
index 07cd68c15..fa29b62f5 100644
--- a/docs/inference-providers/tasks/image-to-image.md
+++ b/docs/inference-providers/tasks/image-to-image.md
@@ -35,64 +35,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=image-to-image
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-# output is a PIL.Image object
-image = client.image_to_image(
-    "cat.png",
-    prompt="Turn the cat into a tiger.",
-    model="stabilityai/stable-diffusion-xl-refiner-1.0",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import base64
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/stabilityai/stable-diffusion-xl-refiner-1.0"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    with open(payload["inputs"], "rb") as f:
-        img = f.read()
-        payload["inputs"] = base64.b64encode(img).decode("utf-8")
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.content
-
-image_bytes = query({
-    "inputs": "cat.png",
-    "parameters": {
-        "prompt": "Turn the cat into a tiger."
-    }
-})
-
-# You can access the image with PIL.Image for example
-import io
-from PIL import Image
-image = Image.open(io.BytesIO(image_bytes))
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"enhanceaiteam/Flux-Uncensored-V2","providerModelId":"black-forest-labs/FLUX.1-dev"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/object-detection.md b/docs/inference-providers/tasks/object-detection.md
index 5eb2ba973..a96fbe2dc 100644
--- a/docs/inference-providers/tasks/object-detection.md
+++ b/docs/inference-providers/tasks/object-detection.md
@@ -30,87 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=object-detection
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-output = client.object_detection("cats.jpg", model="facebook/detr-resnet-50")
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers={"Content-Type": "image/jpeg", **headers}, data=data)
-    return response.json()
-
-output = query("cats.jpg")
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "image/jpeg"
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "cats.jpg" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/facebook/detr-resnet-50 \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: image/jpeg' \
-    --data-binary @"cats.jpg"
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"facebook/detr-resnet-50","providerModelId":"facebook/detr-resnet-50"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/question-answering.md b/docs/inference-providers/tasks/question-answering.md
index 1bc1a0afd..43db30cf4 100644
--- a/docs/inference-providers/tasks/question-answering.md
+++ b/docs/inference-providers/tasks/question-answering.md
@@ -30,124 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=question-answering
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.question_answering(
-    inputs={
-	"question": "What is my name?",
-	"context": "My name is Clara and I live in Berkeley."
-},
-    model="distilbert/distilbert-base-cased-distilled-squad",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": {
-	"question": "What is my name?",
-	"context": "My name is Clara and I live in Berkeley."
-},
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: {
-	"question": "What is my name?",
-	"context": "My name is Clara and I live in Berkeley."
-} }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.questionAnswering({
-	model: "distilbert/distilbert-base-cased-distilled-squad",
-	inputs: {
-	"question": "What is my name?",
-	"context": "My name is Clara and I live in Berkeley."
-},
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/distilbert/distilbert-base-cased-distilled-squad \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "{\n\t\"question\": \"What is my name?\",\n\t\"context\": \"My name is Clara and I live in Berkeley.\"\n}"
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"distilbert/distilbert-base-cased-distilled-squad","providerModelId":"distilbert/distilbert-base-cased-distilled-squad"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/summarization.md b/docs/inference-providers/tasks/summarization.md
index 911a15f21..f1296b139 100644
--- a/docs/inference-providers/tasks/summarization.md
+++ b/docs/inference-providers/tasks/summarization.md
@@ -30,112 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=summarization
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.summarization(
-    inputs="The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
-    model="facebook/bart-large-cnn",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct." }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.summarization({
-	model: "facebook/bart-large-cnn",
-	inputs: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.\""
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"facebook/bart-large-cnn","providerModelId":"facebook/bart-large-cnn"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/text-classification.md b/docs/inference-providers/tasks/text-classification.md
index e12a00f93..20fcaabac 100644
--- a/docs/inference-providers/tasks/text-classification.md
+++ b/docs/inference-providers/tasks/text-classification.md
@@ -30,112 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=text-classification
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.text_classification(
-    inputs="I like you. I love you",
-    model="ProsusAI/finbert",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/ProsusAI/finbert"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "I like you. I love you",
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/ProsusAI/finbert",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "I like you. I love you" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.textClassification({
-	model: "ProsusAI/finbert",
-	inputs: "I like you. I love you",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/ProsusAI/finbert \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"I like you. I love you\""
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"ProsusAI/finbert","providerModelId":"ProsusAI/finbert"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/text-generation.md b/docs/inference-providers/tasks/text-generation.md
index 1fe72fccc..1adecc04f 100644
--- a/docs/inference-providers/tasks/text-generation.md
+++ b/docs/inference-providers/tasks/text-generation.md
@@ -32,215 +32,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=text-generation
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.text_generation(
-    inputs="Can you please let us know more details about your ",
-    model="Qwen/QwQ-32B",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "Can you please let us know more details about your ",
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "Can you please let us know more details about your " }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.textGeneration({
-	model: "Qwen/QwQ-32B",
-	inputs: "Can you please let us know more details about your ",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"Can you please let us know more details about your \""
-    }'
-```
-
-</snippet>
-
-
-<snippet provider="together" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="together",
-    api_key="hf_***",
-)
-
-result = client.text_generation(
-    inputs="Can you please let us know more details about your ",
-    model="deepseek-ai/DeepSeek-R1",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="together" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/together/v1/completions"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "Can you please let us know more details about your ",
-})
-```
-
-</snippet>
-
-
-<snippet provider="together" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/together/v1/completions",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "Can you please let us know more details about your " }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="together" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.textGeneration({
-	model: "deepseek-ai/DeepSeek-R1",
-	inputs: "Can you please let us know more details about your ",
-	provider: "together",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="together" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/together/v1/completions \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"Can you please let us know more details about your \"",
-        "model": "deepseek-ai/DeepSeek-R1"
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B"},"together":{"modelId":"deepseek-ai/DeepSeek-R1","providerModelId":"deepseek-ai/DeepSeek-R1"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/text-to-image.md b/docs/inference-providers/tasks/text-to-image.md
index 887e04487..22dd0b5b3 100644
--- a/docs/inference-providers/tasks/text-to-image.md
+++ b/docs/inference-providers/tasks/text-to-image.md
@@ -30,392 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=text-to-image
 
-
-<snippet provider="fal-ai" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="fal-ai",
-    api_key="hf_***",
-)
-
-# output is a PIL.Image object
-image = client.text_to_image(
-    "Astronaut riding a horse",
-    model="black-forest-labs/FLUX.1-dev",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="fal-ai" language="python" client="fal_client">
-
-```python
-import fal_client
-
-result = fal_client.subscribe(
-    "fal-ai/flux/dev",
-    arguments={
-        "prompt": "Astronaut riding a horse",
-    },
-)
-print(result)
-```
-
-</snippet>
-
-
-<snippet provider="fal-ai" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/fal-ai/fal-ai/flux/dev",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.blob();
-	return result;
-}
-
-query({ inputs: "Astronaut riding a horse" }).then((response) => {
-    // Use image
-});
-```
-
-</snippet>
-
-
-<snippet provider="fal-ai" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const image = await client.textToImage({
-    provider: "fal-ai",
-    model: "black-forest-labs/FLUX.1-dev",
-	inputs: "Astronaut riding a horse",
-	parameters: { num_inference_steps: 5 },
-});
-/// Use the generated image (it's a Blob)
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-# output is a PIL.Image object
-image = client.text_to_image(
-    "Astronaut riding a horse",
-    model="black-forest-labs/FLUX.1-dev",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.content
-
-image_bytes = query({
-    "inputs": "Astronaut riding a horse",
-})
-
-# You can access the image with PIL.Image for example
-import io
-from PIL import Image
-image = Image.open(io.BytesIO(image_bytes))
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-dev",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.blob();
-	return result;
-}
-
-query({ inputs: "Astronaut riding a horse" }).then((response) => {
-    // Use image
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const image = await client.textToImage({
-    provider: "hf-inference",
-    model: "black-forest-labs/FLUX.1-dev",
-	inputs: "Astronaut riding a horse",
-	parameters: { num_inference_steps: 5 },
-});
-/// Use the generated image (it's a Blob)
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="nebius" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="nebius",
-    api_key="hf_***",
-)
-
-# output is a PIL.Image object
-image = client.text_to_image(
-    "Astronaut riding a horse",
-    model="black-forest-labs/FLUX.1-dev",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="nebius" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/nebius/v1/images/generations",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.blob();
-	return result;
-}
-
-query({ inputs: "Astronaut riding a horse" }).then((response) => {
-    // Use image
-});
-```
-
-</snippet>
-
-
-<snippet provider="nebius" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const image = await client.textToImage({
-    provider: "nebius",
-    model: "black-forest-labs/FLUX.1-dev",
-	inputs: "Astronaut riding a horse",
-	parameters: { num_inference_steps: 5 },
-});
-/// Use the generated image (it's a Blob)
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="replicate" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="replicate",
-    api_key="hf_***",
-)
-
-# output is a PIL.Image object
-image = client.text_to_image(
-    "Astronaut riding a horse",
-    model="black-forest-labs/FLUX.1-dev",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="replicate" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/replicate/v1/models/black-forest-labs/flux-dev/predictions",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.blob();
-	return result;
-}
-
-query({ inputs:  }).then((response) => {
-    // Use image
-});
-```
-
-</snippet>
-
-
-<snippet provider="replicate" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const image = await client.textToImage({
-    provider: "replicate",
-    model: "black-forest-labs/FLUX.1-dev",
-	inputs: "Astronaut riding a horse",
-	parameters: { num_inference_steps: 5 },
-});
-/// Use the generated image (it's a Blob)
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="together" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="together",
-    api_key="hf_***",
-)
-
-# output is a PIL.Image object
-image = client.text_to_image(
-    "Astronaut riding a horse",
-    model="black-forest-labs/FLUX.1-dev",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="together" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/together/v1/images/generations",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.blob();
-	return result;
-}
-
-query({ inputs: "Astronaut riding a horse" }).then((response) => {
-    // Use image
-});
-```
-
-</snippet>
-
-
-<snippet provider="together" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const image = await client.textToImage({
-    provider: "together",
-    model: "black-forest-labs/FLUX.1-dev",
-	inputs: "Astronaut riding a horse",
-	parameters: { num_inference_steps: 5 },
-});
-/// Use the generated image (it's a Blob)
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-</inferencesnippet>
+    providersMapping={ {"fal-ai":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"fal-ai/flux/dev"},"hf-inference":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/FLUX.1-dev"},"nebius":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/flux-dev"},"replicate":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/flux-dev"},"together":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/FLUX.1-dev"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/token-classification.md b/docs/inference-providers/tasks/token-classification.md
index e6691c9d1..eab7124e8 100644
--- a/docs/inference-providers/tasks/token-classification.md
+++ b/docs/inference-providers/tasks/token-classification.md
@@ -30,112 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=token-classification
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.token_classification(
-    inputs="My name is Sarah Jessica Parker but you can call me Jessica",
-    model="dslim/bert-base-NER",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "My name is Sarah Jessica Parker but you can call me Jessica",
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "My name is Sarah Jessica Parker but you can call me Jessica" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.tokenClassification({
-	model: "dslim/bert-base-NER",
-	inputs: "My name is Sarah Jessica Parker but you can call me Jessica",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/dslim/bert-base-NER \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"My name is Sarah Jessica Parker but you can call me Jessica\""
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"dslim/bert-base-NER","providerModelId":"dslim/bert-base-NER"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/translation.md b/docs/inference-providers/tasks/translation.md
index 1a4515ed4..6a690152e 100644
--- a/docs/inference-providers/tasks/translation.md
+++ b/docs/inference-providers/tasks/translation.md
@@ -30,112 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=translation
 
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-result = client.translation(
-    inputs="Меня зовут Вольфганг и я живу в Берлине",
-    model="facebook/nllb-200-distilled-600M",
-)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/facebook/nllb-200-distilled-600M"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "Меня зовут Вольфганг и я живу в Берлине",
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/facebook/nllb-200-distilled-600M",
-		{
-			headers: {
-				Authorization: "Bearer hf_***",
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({ inputs: "Меня зовут Вольфганг и я живу в Берлине" }).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const output = await client.translation({
-	model: "facebook/nllb-200-distilled-600M",
-	inputs: "Меня зовут Вольфганг и я живу в Берлине",
-	provider: "hf-inference",
-});
-
-console.log(output);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/facebook/nllb-200-distilled-600M \
-    -X POST \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "inputs": "\"Меня зовут Вольфганг и я живу в Берлине\""
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"facebook/nllb-200-distilled-600M","providerModelId":"facebook/nllb-200-distilled-600M"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/zero-shot-classification.md b/docs/inference-providers/tasks/zero-shot-classification.md
index 170d93398..ba4c15738 100644
--- a/docs/inference-providers/tasks/zero-shot-classification.md
+++ b/docs/inference-providers/tasks/zero-shot-classification.md
@@ -30,74 +30,11 @@ Explore all available models and find the one that suits you best [here](https:/
 ### Using the API
 
 
-<inferencesnippet>
+<InferenceSnippet
+    pipeline=zero-shot-classification
 
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-output = query({
-    "inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
-    "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
-})
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="fetch">
-
-```js
-async function query(data) {
-    const response = await fetch(
-		"https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli",
-        {
-            headers: {
-				Authorization: "Bearer hf_***",
-                "Content-Type": "application/json",
-            },
-            method: "POST",
-            body: JSON.stringify(data),
-        }
-    );
-    const result = await response.json();
-    return result;
-}
-
-query({
-    inputs: "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
-    parameters: { candidate_labels: ["refund", "legal", "faq"] }
-}).then((response) => {
-    console.log(JSON.stringify(response));
-});
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli \
-    -X POST \
-    -d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
-    -H 'Content-Type: application/json' \
-    -H 'Authorization: Bearer hf_***'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+    providersMapping={ {"hf-inference":{"modelId":"facebook/bart-large-mnli","providerModelId":"facebook/bart-large-mnli"}} }
+/>
 
 
 
diff --git a/scripts/inference-providers/scripts/generate.ts b/scripts/inference-providers/scripts/generate.ts
index fb4758544..bef7a678b 100644
--- a/scripts/inference-providers/scripts/generate.ts
+++ b/scripts/inference-providers/scripts/generate.ts
@@ -1,13 +1,5 @@
-import {
-  snippets,
-  INFERENCE_PROVIDERS,
-  InferenceProvider,
-} from "@huggingface/inference";
-import {
-  PipelineType,
-  InferenceSnippet,
-  type ModelDataMinimal,
-} from "@huggingface/tasks";
+import { INFERENCE_PROVIDERS } from "@huggingface/inference";
+import { PipelineType } from "@huggingface/tasks";
 import Handlebars from "handlebars";
 import * as fs from "node:fs/promises";
 import * as path from "node:path/posix";
@@ -152,39 +144,6 @@ console.log(`   🕸️  Fetching ${TASKS_API_URL}`);
 // eslint-disable-next-line @typescript-eslint/no-explicit-any
 const TASKS_DATA = (await authFetchJson(TASKS_API_URL)) as any;
 
-///////////////////////
-//// Snippet utils ////
-///////////////////////
-
-export function getFormattedInferenceSnippet(
-  pipeline_tag: PipelineType,
-  model: {
-    modelId: string;
-    provider: string;
-    providerModelId: string;
-    providerTask: string;
-    tags: string[];
-  },
-  conversational: boolean
-): InferenceSnippet[] {
-  if (conversational && !model.tags.includes("conversational")) {
-    return [];
-  }
-  return snippets.getInferenceSnippets(
-    {
-      id: model.modelId,
-      pipeline_tag,
-      mask_token: "[MASK]",
-      library_name: "",
-      tags: conversational ? ["conversational"] : [],
-      inference: "",
-    } as ModelDataMinimal,
-    "hf_***",
-    model.provider as InferenceProvider,
-    model.providerModelId
-  );
-}
-
 /////////////////////
 //// Specs utils ////
 /////////////////////
@@ -528,21 +487,40 @@ TASKS.forEach((task) => {
   );
 });
 
+function buildProviderMapping(
+  models: {
+    modelId: string;
+    provider: string;
+    providerModelId: string;
+    providerTask: string;
+    tags: string[];
+  }[]
+): Record<string, { modelId: string; providerModelId: string }> {
+  return models.reduce(
+    (acc, item) => {
+      acc[item.provider] = {
+        modelId: item.modelId,
+        providerModelId: item.providerModelId,
+      };
+      return acc;
+    },
+    {} as Record<string, { modelId: string; providerModelId: string }>
+  );
+}
+
 // Generate snippets
 TASKS.forEach((task) => {
-  const inferenceSnippets = DATA.perProviderWarmModels[task].flatMap((model) =>
-    getFormattedInferenceSnippet(task, model, false).map(
-      (inferenceSnippet) => ({
-        ...inferenceSnippet,
-        provider: model.provider,
-      })
-    )
+  const providersMapping = buildProviderMapping(
+    DATA.perProviderWarmModels[task]
   );
 
   DATA.snippets[task] = SNIPPETS_TEMPLATE({
-    inferenceSnippets,
+    task,
     taskSnakeCase: task.replaceAll("-", "_"),
     taskAttached: task.replaceAll("-", ""),
+    conversational: false,
+    hasSnippets: Object.keys(providersMapping).length > 0,
+    providersMappingAsStr: JSON.stringify(providersMapping),
   });
 });
 
@@ -585,18 +563,17 @@ async function fetchChatCompletion() {
   DATA.recommendedModels["chat-completion"] = DATA.recommendedModels[
     "text-generation"
   ].filter((model) => model.tags?.includes("conversational"));
+
+  const providersMappingChatCompletion = buildProviderMapping(
+    await fetchWarmModels("text-generation")
+  );
   DATA.snippets["chat-completion"] = SNIPPETS_TEMPLATE({
+    task: "text-generation",
     taskSnakeCase: "chat_completion",
     taskAttached: "chatCompletion",
-    inferenceSnippets: (await fetchWarmModels("text-generation")).flatMap(
-      (model) =>
-        getFormattedInferenceSnippet("text-generation", model, true).map(
-          (inferenceSnippet) => ({
-            ...inferenceSnippet,
-            provider: model.provider,
-          })
-        )
-    ),
+    conversational: true,
+    hasSnippets: Object.keys(providersMappingChatCompletion).length > 0,
+    providersMappingAsStr: JSON.stringify(providersMappingChatCompletion),
   });
 
   // Conversational image-text-to-text
@@ -607,18 +584,17 @@ async function fetchChatCompletion() {
     DATA.recommendedModels["image-text-to-text"].filter((model) =>
       model.tags?.includes("conversational")
     );
+  const providersMappingImageTextToText = buildProviderMapping(
+    await fetchWarmModels("image-text-to-text")
+  );
+
   DATA.snippets["conversational-image-text-to-text"] = SNIPPETS_TEMPLATE({
+    task: "image-text-to-text",
     taskSnakeCase: "chat_completion",
     taskAttached: "chatCompletion",
-    inferenceSnippets: (await fetchWarmModels("image-text-to-text")).flatMap(
-      (model) =>
-        getFormattedInferenceSnippet("image-text-to-text", model, true).map(
-          (inferenceSnippet) => ({
-            ...inferenceSnippet,
-            provider: model.provider,
-          })
-        )
-    ),
+    conversational: true,
+    hasSnippets: Object.keys(providersMappingImageTextToText).length > 0,
+    providersMappingAsStr: JSON.stringify(providersMappingImageTextToText),
   });
 }
 
@@ -639,6 +615,9 @@ async function renderTemplate(
 
 await Promise.all(
   TASKS_EXTENDED.map(async (task) => {
+    if (task === "image-text-to-text") {
+      return; // not generated -> merged with chat-completion
+    }
     // @ts-ignore
     const rendered = await renderTemplate(task, DATA);
     await writeTaskDoc(task, rendered);
diff --git a/scripts/inference-providers/templates/common/snippets-template.handlebars b/scripts/inference-providers/templates/common/snippets-template.handlebars
index 2f974071d..8c98bbc58 100644
--- a/scripts/inference-providers/templates/common/snippets-template.handlebars
+++ b/scripts/inference-providers/templates/common/snippets-template.handlebars
@@ -1,26 +1,10 @@
-{{#if inferenceSnippets.length }}
+{{#if hasSnippets }}
 
-<inferencesnippet>
-
-{{#each inferenceSnippets}}
-
-<snippet provider="{{this.provider}}" language="{{this.language}}" client="{{this.client}}">
-
-```{{this.language}}
-{{{this.content}}}
-```
-
-</snippet>
-
-{{#if (eq this.client "huggingface_hub")}}
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
-{{/if}}
-{{#if (eq this.client "huggingface.js")}}
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#{{taskAttached}}).
-{{/if}}
-{{/each}}
-
-</inferencesnippet>
+<InferenceSnippet
+    pipeline={{ task }}
+{{#if conversational }}conversational{{/if}}
+    providersMapping={ {{{providersMappingAsStr}}} }
+/>
 
 {{else}}
 
diff --git a/scripts/inference-providers/templates/task/image-text-to-text.handlebars b/scripts/inference-providers/templates/task/image-text-to-text.handlebars
deleted file mode 100644
index 64995f27e..000000000
--- a/scripts/inference-providers/templates/task/image-text-to-text.handlebars
+++ /dev/null
@@ -1,23 +0,0 @@
-## Image-Text to Text
-
-Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.
-
-{{{tips.linksToTaskPage.image-text-to-text}}}
-
-### Recommended models
-
-{{#each models.conversational-image-text-to-text}}
-- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
-{{/each}}
-
-{{{tips.listModelsLink.image-text-to-text}}}
-
-### Using the API
-
-{{{snippets.conversational-image-text-to-text}}}
-
-### API specification
-
-For the API specification of conversational image-text-to-text models, please refer to the [Chat Completion API documentation](https://huggingface.co/docs/inference-providers/tasks/chat-completion#api-specification).
-
-

From a11736a7d7260b207e7a277a4183c90fcc3d3612 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 2 Apr 2025 18:22:47 +0200
Subject: [PATCH 5/6] better?

---
 .../tasks/audio-classification.md             |   3 +
 .../tasks/automatic-speech-recognition.md     |   2 +
 .../tasks/chat-completion.md                  | 793 +-----------------
 .../tasks/feature-extraction.md               |   3 +-
 docs/inference-providers/tasks/fill-mask.md   |   7 +-
 .../tasks/image-classification.md             |   3 +
 .../tasks/image-segmentation.md               |   2 +
 .../tasks/object-detection.md                 |   1 +
 .../tasks/question-answering.md               |   3 +
 .../tasks/summarization.md                    |   2 +
 .../tasks/table-question-answering.md         |   2 +
 .../tasks/text-classification.md              |   7 +-
 .../tasks/text-generation.md                  |   6 +
 .../tasks/text-to-image.md                    |   3 +
 .../tasks/token-classification.md             |   4 +
 docs/inference-providers/tasks/translation.md |   4 +-
 .../tasks/zero-shot-classification.md         |   1 +
 .../inference-providers/scripts/generate.ts   |  23 +-
 .../task/audio-classification.handlebars      |   2 +-
 .../automatic-speech-recognition.handlebars   |   2 +-
 .../templates/task/chat-completion.handlebars |   4 +-
 .../task/feature-extraction.handlebars        |   2 +-
 .../templates/task/fill-mask.handlebars       |   2 +-
 .../task/image-classification.handlebars      |   2 +-
 .../task/image-segmentation.handlebars        |   2 +-
 .../templates/task/image-to-image.handlebars  |   2 +-
 .../task/object-detection.handlebars          |   2 +-
 .../task/question-answering.handlebars        |   2 +-
 .../templates/task/summarization.handlebars   |   2 +-
 .../task/table-question-answering.handlebars  |   2 +-
 .../task/text-classification.handlebars       |   2 +-
 .../templates/task/text-generation.handlebars |   2 +-
 .../templates/task/text-to-image.handlebars   |   2 +-
 .../task/token-classification.handlebars      |   2 +-
 .../templates/task/translation.handlebars     |   2 +-
 .../task/zero-shot-classification.handlebars  |   2 +-
 36 files changed, 97 insertions(+), 810 deletions(-)

diff --git a/docs/inference-providers/tasks/audio-classification.md b/docs/inference-providers/tasks/audio-classification.md
index 575ee571a..b28383e60 100644
--- a/docs/inference-providers/tasks/audio-classification.md
+++ b/docs/inference-providers/tasks/audio-classification.md
@@ -29,6 +29,9 @@ For more details about the `audio-classification` task, check out its [dedicated
 
 ### Recommended models
 
+- [speechbrain/google_speech_command_xvector](https://huggingface.co/speechbrain/google_speech_command_xvector): An easy-to-use model for command recognition.
+- [ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition](https://huggingface.co/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition): An emotion recognition model.
+- [facebook/mms-lid-126](https://huggingface.co/facebook/mms-lid-126): A language identification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=audio-classification&sort=trending).
 
diff --git a/docs/inference-providers/tasks/automatic-speech-recognition.md b/docs/inference-providers/tasks/automatic-speech-recognition.md
index 78cedd284..1bd8eb2c0 100644
--- a/docs/inference-providers/tasks/automatic-speech-recognition.md
+++ b/docs/inference-providers/tasks/automatic-speech-recognition.md
@@ -29,6 +29,8 @@ For more details about the `automatic-speech-recognition` task, check out its [d
 
 ### Recommended models
 
+- [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3): A powerful ASR model by OpenAI.
+- [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large): An end-to-end model that performs ASR and Speech Translation by MetaAI.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending).
 
diff --git a/docs/inference-providers/tasks/chat-completion.md b/docs/inference-providers/tasks/chat-completion.md
index 75daee73b..4ad65bbc8 100644
--- a/docs/inference-providers/tasks/chat-completion.md
+++ b/docs/inference-providers/tasks/chat-completion.md
@@ -21,9 +21,16 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/inference-p
 
 #### Conversational Large Language Models (LLMs)
 
+- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
+- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
+- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
+- [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
+- [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
+- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 #### Conversational Vision-Language Models (VLMs)
 
+- [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct): Strong image-text-to-text model.
 
 ### API Playground
 
@@ -51,788 +58,22 @@ The API supports:
 #### Code snippet example for conversational LLMs
 
 
-<inferencesnippet>
-
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-completion = client.chat.completions.create(
-    model="Qwen/QwQ-32B",
-    messages=[
-        {
-            "role": "user",
-            "content": "What is the capital of France?"
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1/chat/completions"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-response = query({
-    "messages": [
-        {
-            "role": "user",
-            "content": "What is the capital of France?"
-        }
-    ],
-    "max_tokens": 500,
-    "model": "Qwen/QwQ-32B"
-})
-
-print(response["choices"][0]["message"])
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="python" client="openai">
-
-```python
-from openai import OpenAI
-
-client = OpenAI(
-    base_url="https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1",
-    api_key="hf_***"
-)
-
-completion = client.chat.completions.create(
-    model="Qwen/QwQ-32B",
-    messages=[
-        {
-            "role": "user",
-            "content": "What is the capital of France?"
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const chatCompletion = await client.chatCompletion({
-    provider: "hf-inference",
-    model: "Qwen/QwQ-32B",
-    messages: [
-        {
-            role: "user",
-            content: "What is the capital of France?",
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="js" client="openai">
-
-```js
-import { OpenAI } from "openai";
-
-const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1",
-	apiKey: "hf_***",
-});
-
-const chatCompletion = await client.chat.completions.create({
-	model: "Qwen/QwQ-32B",
-    messages: [
-        {
-            role: "user",
-            content: "What is the capital of France?",
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/Qwen/QwQ-32B/v1/chat/completions \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "messages": [
-            {
-                "role": "user",
-                "content": "What is the capital of France?"
-            }
-        ],
-        "max_tokens": 500,
-        "model": "Qwen/QwQ-32B",
-        "stream": false
-    }'
-```
-
-</snippet>
-
-
-<snippet provider="together" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="together",
-    api_key="hf_***",
-)
-
-completion = client.chat.completions.create(
-    model="deepseek-ai/DeepSeek-R1",
-    messages=[
-        {
-            "role": "user",
-            "content": "What is the capital of France?"
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="together" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/together/v1/chat/completions"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-response = query({
-    "messages": [
-        {
-            "role": "user",
-            "content": "What is the capital of France?"
-        }
-    ],
-    "max_tokens": 500,
-    "model": "deepseek-ai/DeepSeek-R1"
-})
-
-print(response["choices"][0]["message"])
-```
-
-</snippet>
-
-
-<snippet provider="together" language="python" client="openai">
-
-```python
-from openai import OpenAI
-
-client = OpenAI(
-    base_url="https://router.huggingface.co/together/v1",
-    api_key="hf_***"
-)
-
-completion = client.chat.completions.create(
-    model="deepseek-ai/DeepSeek-R1",
-    messages=[
-        {
-            "role": "user",
-            "content": "What is the capital of France?"
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-
-<snippet provider="together" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const chatCompletion = await client.chatCompletion({
-    provider: "together",
-    model: "deepseek-ai/DeepSeek-R1",
-    messages: [
-        {
-            role: "user",
-            content: "What is the capital of France?",
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="together" language="js" client="openai">
-
-```js
-import { OpenAI } from "openai";
-
-const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/together/v1",
-	apiKey: "hf_***",
-});
-
-const chatCompletion = await client.chat.completions.create({
-	model: "deepseek-ai/DeepSeek-R1",
-    messages: [
-        {
-            role: "user",
-            content: "What is the capital of France?",
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-
-<snippet provider="together" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/together/v1/chat/completions \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "messages": [
-            {
-                "role": "user",
-                "content": "What is the capital of France?"
-            }
-        ],
-        "max_tokens": 500,
-        "model": "deepseek-ai/DeepSeek-R1",
-        "stream": false
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+<InferenceSnippet
+    pipeline=text-generation
+conversational
+    providersMapping={ {"cerebras":{"modelId":"meta-llama/Llama-3.3-70B-Instruct","providerModelId":"llama-3.3-70b"},"fireworks-ai":{"modelId":"Qwen/QwQ-32B","providerModelId":"accounts/fireworks/models/qwq-32b"},"hf-inference":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B"},"hyperbolic":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B"},"nebius":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B-fast"},"novita":{"modelId":"Qwen/QwQ-32B","providerModelId":"qwen/qwq-32b"},"sambanova":{"modelId":"Qwen/QwQ-32B","providerModelId":"QwQ-32B"},"together":{"modelId":"deepseek-ai/DeepSeek-R1","providerModelId":"deepseek-ai/DeepSeek-R1"}} }
+/>
 
 
 
 #### Code snippet example for conversational VLMs
 
 
-<inferencesnippet>
-
-
-<snippet provider="hf-inference" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hf-inference",
-    api_key="hf_***",
-)
-
-completion = client.chat.completions.create(
-    model="google/gemma-3-27b-it",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Describe this image in one sentence."
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                    }
-                }
-            ]
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hf-inference" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-response = query({
-    "messages": [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Describe this image in one sentence."
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                    }
-                }
-            ]
-        }
-    ],
-    "max_tokens": 500,
-    "model": "google/gemma-3-27b-it"
-})
-
-print(response["choices"][0]["message"])
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="python" client="openai">
-
-```python
-from openai import OpenAI
-
-client = OpenAI(
-    base_url="https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
-    api_key="hf_***"
-)
-
-completion = client.chat.completions.create(
-    model="google/gemma-3-27b-it",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Describe this image in one sentence."
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                    }
-                }
-            ]
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const chatCompletion = await client.chatCompletion({
-    provider: "hf-inference",
-    model: "google/gemma-3-27b-it",
-    messages: [
-        {
-            role: "user",
-            content: [
-                {
-                    type: "text",
-                    text: "Describe this image in one sentence.",
-                },
-                {
-                    type: "image_url",
-                    image_url: {
-                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
-                    },
-                },
-            ],
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hf-inference" language="js" client="openai">
-
-```js
-import { OpenAI } from "openai";
-
-const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1",
-	apiKey: "hf_***",
-});
-
-const chatCompletion = await client.chat.completions.create({
-	model: "google/gemma-3-27b-it",
-    messages: [
-        {
-            role: "user",
-            content: [
-                {
-                    type: "text",
-                    text: "Describe this image in one sentence.",
-                },
-                {
-                    type: "image_url",
-                    image_url: {
-                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
-                    },
-                },
-            ],
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-
-<snippet provider="hf-inference" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hf-inference/models/google/gemma-3-27b-it/v1/chat/completions \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "messages": [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "Describe this image in one sentence."
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                        }
-                    }
-                ]
-            }
-        ],
-        "max_tokens": 500,
-        "model": "google/gemma-3-27b-it",
-        "stream": false
-    }'
-```
-
-</snippet>
-
-
-<snippet provider="hyperbolic" language="python" client="huggingface_hub">
-
-```python
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    provider="hyperbolic",
-    api_key="hf_***",
-)
-
-completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Describe this image in one sentence."
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                    }
-                }
-            ]
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-To use the Python `InferenceClient`, see the [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.).
-
-<snippet provider="hyperbolic" language="python" client="requests">
-
-```python
-import requests
-
-API_URL = "https://router.huggingface.co/hyperbolic/v1/chat/completions"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-
-response = query({
-    "messages": [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Describe this image in one sentence."
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                    }
-                }
-            ]
-        }
-    ],
-    "max_tokens": 500,
-    "model": "Qwen/Qwen2.5-VL-7B-Instruct"
-})
-
-print(response["choices"][0]["message"])
-```
-
-</snippet>
-
-
-<snippet provider="hyperbolic" language="python" client="openai">
-
-```python
-from openai import OpenAI
-
-client = OpenAI(
-    base_url="https://router.huggingface.co/hyperbolic/v1",
-    api_key="hf_***"
-)
-
-completion = client.chat.completions.create(
-    model="Qwen/Qwen2.5-VL-7B-Instruct",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Describe this image in one sentence."
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                    }
-                }
-            ]
-        }
-    ],
-    max_tokens=500,
-)
-
-print(completion.choices[0].message)
-```
-
-</snippet>
-
-
-<snippet provider="hyperbolic" language="js" client="huggingface.js">
-
-```js
-import { InferenceClient } from "@huggingface/inference";
-
-const client = new InferenceClient("hf_***");
-
-const chatCompletion = await client.chatCompletion({
-    provider: "hyperbolic",
-    model: "Qwen/Qwen2.5-VL-7B-Instruct",
-    messages: [
-        {
-            role: "user",
-            content: [
-                {
-                    type: "text",
-                    text: "Describe this image in one sentence.",
-                },
-                {
-                    type: "image_url",
-                    image_url: {
-                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
-                    },
-                },
-            ],
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-To use the JavaScript `InferenceClient`, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/InferenceClient#).
-
-<snippet provider="hyperbolic" language="js" client="openai">
-
-```js
-import { OpenAI } from "openai";
-
-const client = new OpenAI({
-	baseURL: "https://router.huggingface.co/hyperbolic/v1",
-	apiKey: "hf_***",
-});
-
-const chatCompletion = await client.chat.completions.create({
-	model: "Qwen/Qwen2.5-VL-7B-Instruct",
-    messages: [
-        {
-            role: "user",
-            content: [
-                {
-                    type: "text",
-                    text: "Describe this image in one sentence.",
-                },
-                {
-                    type: "image_url",
-                    image_url: {
-                        url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
-                    },
-                },
-            ],
-        },
-    ],
-    max_tokens: 500,
-});
-
-console.log(chatCompletion.choices[0].message);
-```
-
-</snippet>
-
-
-<snippet provider="hyperbolic" language="sh" client="curl">
-
-```sh
-curl https://router.huggingface.co/hyperbolic/v1/chat/completions \
-    -H 'Authorization: Bearer hf_***' \
-    -H 'Content-Type: application/json' \
-    -d '{
-        "messages": [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "Describe this image in one sentence."
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
-                        }
-                    }
-                ]
-            }
-        ],
-        "max_tokens": 500,
-        "model": "Qwen/Qwen2.5-VL-7B-Instruct",
-        "stream": false
-    }'
-```
-
-</snippet>
-
-
-</inferencesnippet>
+<InferenceSnippet
+    pipeline=image-text-to-text
+conversational
+    providersMapping={ {"fireworks-ai":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"accounts/fireworks/models/llama-v3p2-11b-vision-instruct"},"hf-inference":{"modelId":"google/gemma-3-27b-it","providerModelId":"google/gemma-3-27b-it"},"hyperbolic":{"modelId":"Qwen/Qwen2.5-VL-7B-Instruct","providerModelId":"Qwen/Qwen2.5-VL-7B-Instruct"},"nebius":{"modelId":"google/gemma-3-27b-it","providerModelId":"google/gemma-3-27b-it-fast"},"novita":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"meta-llama/llama-3.2-11b-vision-instruct"},"sambanova":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"Llama-3.2-11B-Vision-Instruct"},"together":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"meta-llama/Llama-3.2-11B-Vision-Instruct"}} }
+/>
 
 
 
diff --git a/docs/inference-providers/tasks/feature-extraction.md b/docs/inference-providers/tasks/feature-extraction.md
index de1d85461..7af7743a6 100644
--- a/docs/inference-providers/tasks/feature-extraction.md
+++ b/docs/inference-providers/tasks/feature-extraction.md
@@ -29,6 +29,7 @@ For more details about the `feature-extraction` task, check out its [dedicated p
 
 ### Recommended models
 
+- [thenlper/gte-large](https://huggingface.co/thenlper/gte-large): A powerful feature extraction model for natural language processing tasks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=feature-extraction&sort=trending).
 
@@ -38,7 +39,7 @@ Explore all available models and find the one that suits you best [here](https:/
 <InferenceSnippet
     pipeline=feature-extraction
 
-    providersMapping={ {"hf-inference":{"modelId":"mixedbread-ai/mxbai-embed-large-v1","providerModelId":"mixedbread-ai/mxbai-embed-large-v1"},"sambanova":{"modelId":"intfloat/e5-mistral-7b-instruct","providerModelId":"E5-Mistral-7B-Instruct"}} }
+    providersMapping={ {"hf-inference":{"modelId":"intfloat/multilingual-e5-large-instruct","providerModelId":"intfloat/multilingual-e5-large-instruct"},"sambanova":{"modelId":"intfloat/e5-mistral-7b-instruct","providerModelId":"E5-Mistral-7B-Instruct"}} }
 />
 
 
diff --git a/docs/inference-providers/tasks/fill-mask.md b/docs/inference-providers/tasks/fill-mask.md
index f23053874..d5331a9fe 100644
--- a/docs/inference-providers/tasks/fill-mask.md
+++ b/docs/inference-providers/tasks/fill-mask.md
@@ -24,17 +24,14 @@ For more details about the `fill-mask` task, check out its [dedicated page](http
 
 ### Recommended models
 
+- [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base): A multilingual model trained on 100 languages.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending).
 
 ### Using the API
 
 
-<InferenceSnippet
-    pipeline=fill-mask
-
-    providersMapping={ {"hf-inference":{"modelId":"google-bert/bert-base-multilingual-cased","providerModelId":"google-bert/bert-base-multilingual-cased"}} }
-/>
+No snippet available for this task.
 
 
 
diff --git a/docs/inference-providers/tasks/image-classification.md b/docs/inference-providers/tasks/image-classification.md
index ec999d3db..a71c03e03 100644
--- a/docs/inference-providers/tasks/image-classification.md
+++ b/docs/inference-providers/tasks/image-classification.md
@@ -24,6 +24,9 @@ For more details about the `image-classification` task, check out its [dedicated
 
 ### Recommended models
 
+- [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model.
+- [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224): A robust image classification model.
+- [facebook/convnext-large-224](https://huggingface.co/facebook/convnext-large-224): A strong image classification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending).
 
diff --git a/docs/inference-providers/tasks/image-segmentation.md b/docs/inference-providers/tasks/image-segmentation.md
index df1510e4f..eb5f8709c 100644
--- a/docs/inference-providers/tasks/image-segmentation.md
+++ b/docs/inference-providers/tasks/image-segmentation.md
@@ -24,6 +24,8 @@ For more details about the `image-segmentation` task, check out its [dedicated p
 
 ### Recommended models
 
+- [openmmlab/upernet-convnext-small](https://huggingface.co/openmmlab/upernet-convnext-small): Solid semantic segmentation model trained on ADE20k.
+- [facebook/mask2former-swin-large-coco-panoptic](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic): Panoptic segmentation model trained on the COCO (common objects) dataset.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
 
diff --git a/docs/inference-providers/tasks/object-detection.md b/docs/inference-providers/tasks/object-detection.md
index a96fbe2dc..cf6ca5d7d 100644
--- a/docs/inference-providers/tasks/object-detection.md
+++ b/docs/inference-providers/tasks/object-detection.md
@@ -24,6 +24,7 @@ For more details about the `object-detection` task, check out its [dedicated pag
 
 ### Recommended models
 
+- [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50): Solid object detection model pre-trained on the COCO 2017 dataset.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=object-detection&sort=trending).
 
diff --git a/docs/inference-providers/tasks/question-answering.md b/docs/inference-providers/tasks/question-answering.md
index 43db30cf4..77728499a 100644
--- a/docs/inference-providers/tasks/question-answering.md
+++ b/docs/inference-providers/tasks/question-answering.md
@@ -24,6 +24,9 @@ For more details about the `question-answering` task, check out its [dedicated p
 
 ### Recommended models
 
+- [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2): A robust baseline model for most question answering domains.
+- [distilbert/distilbert-base-cased-distilled-squad](https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad): Small yet robust model that can answer questions.
+- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A special model that can answer questions from tables.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
 
diff --git a/docs/inference-providers/tasks/summarization.md b/docs/inference-providers/tasks/summarization.md
index f1296b139..d2e711569 100644
--- a/docs/inference-providers/tasks/summarization.md
+++ b/docs/inference-providers/tasks/summarization.md
@@ -24,6 +24,8 @@ For more details about the `summarization` task, check out its [dedicated page](
 
 ### Recommended models
 
+- [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn): A strong summarization model trained on English news articles. Excels at generating factual summaries.
+- [Falconsai/medical_summarization](https://huggingface.co/Falconsai/medical_summarization): A summarization model trained on medical articles.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=summarization&sort=trending).
 
diff --git a/docs/inference-providers/tasks/table-question-answering.md b/docs/inference-providers/tasks/table-question-answering.md
index 8ec2088ea..8e834d0c7 100644
--- a/docs/inference-providers/tasks/table-question-answering.md
+++ b/docs/inference-providers/tasks/table-question-answering.md
@@ -24,6 +24,8 @@ For more details about the `table-question-answering` task, check out its [dedic
 
 ### Recommended models
 
+- [microsoft/tapex-base](https://huggingface.co/microsoft/tapex-base): A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.
+- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A robust table question answering model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=table-question-answering&sort=trending).
 
diff --git a/docs/inference-providers/tasks/text-classification.md b/docs/inference-providers/tasks/text-classification.md
index 20fcaabac..32a5e5b2a 100644
--- a/docs/inference-providers/tasks/text-classification.md
+++ b/docs/inference-providers/tasks/text-classification.md
@@ -24,6 +24,11 @@ For more details about the `text-classification` task, check out its [dedicated
 
 ### Recommended models
 
+- [distilbert/distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english): A robust model trained for sentiment analysis.
+- [ProsusAI/finbert](https://huggingface.co/ProsusAI/finbert): A sentiment analysis model specialized in financial sentiment.
+- [cardiffnlp/twitter-roberta-base-sentiment-latest](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest): A sentiment analysis model specialized in analyzing tweets.
+- [papluca/xlm-roberta-base-language-detection](https://huggingface.co/papluca/xlm-roberta-base-language-detection): A model that can classify languages.
+- [meta-llama/Prompt-Guard-86M](https://huggingface.co/meta-llama/Prompt-Guard-86M): A model that can classify text generation attacks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
 
@@ -33,7 +38,7 @@ Explore all available models and find the one that suits you best [here](https:/
 <InferenceSnippet
     pipeline=text-classification
 
-    providersMapping={ {"hf-inference":{"modelId":"ProsusAI/finbert","providerModelId":"ProsusAI/finbert"}} }
+    providersMapping={ {"hf-inference":{"modelId":"cardiffnlp/twitter-roberta-base-sentiment-latest","providerModelId":"cardiffnlp/twitter-roberta-base-sentiment-latest"}} }
 />
 
 
diff --git a/docs/inference-providers/tasks/text-generation.md b/docs/inference-providers/tasks/text-generation.md
index 1adecc04f..2b973a75f 100644
--- a/docs/inference-providers/tasks/text-generation.md
+++ b/docs/inference-providers/tasks/text-generation.md
@@ -26,6 +26,12 @@ For more details about the `text-generation` task, check out its [dedicated page
 
 ### Recommended models
 
+- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
+- [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
+- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
+- [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
+- [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
+- [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
 
diff --git a/docs/inference-providers/tasks/text-to-image.md b/docs/inference-providers/tasks/text-to-image.md
index 22dd0b5b3..7a6f19808 100644
--- a/docs/inference-providers/tasks/text-to-image.md
+++ b/docs/inference-providers/tasks/text-to-image.md
@@ -24,6 +24,9 @@ For more details about the `text-to-image` task, check out its [dedicated page](
 
 ### Recommended models
 
+- [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
+- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
+- [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
 
diff --git a/docs/inference-providers/tasks/token-classification.md b/docs/inference-providers/tasks/token-classification.md
index eab7124e8..57a953648 100644
--- a/docs/inference-providers/tasks/token-classification.md
+++ b/docs/inference-providers/tasks/token-classification.md
@@ -24,6 +24,10 @@ For more details about the `token-classification` task, check out its [dedicated
 
 ### Recommended models
 
+- [dslim/bert-base-NER](https://huggingface.co/dslim/bert-base-NER): A robust performance model to identify people, locations, organizations and names of miscellaneous entities.
+- [FacebookAI/xlm-roberta-large-finetuned-conll03-english](https://huggingface.co/FacebookAI/xlm-roberta-large-finetuned-conll03-english): A strong model to identify people, locations, organizations and names in multiple languages.
+- [blaze999/Medical-NER](https://huggingface.co/blaze999/Medical-NER): A token classification model specialized on medical entity recognition.
+- [flair/ner-english](https://huggingface.co/flair/ner-english): Flair models are typically the state of the art in named entity recognition tasks.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=token-classification&sort=trending).
 
diff --git a/docs/inference-providers/tasks/translation.md b/docs/inference-providers/tasks/translation.md
index 6a690152e..6016b3af4 100644
--- a/docs/inference-providers/tasks/translation.md
+++ b/docs/inference-providers/tasks/translation.md
@@ -24,6 +24,8 @@ For more details about the `translation` task, check out its [dedicated page](ht
 
 ### Recommended models
 
+- [facebook/nllb-200-1.3B](https://huggingface.co/facebook/nllb-200-1.3B): Very powerful model that can translate many languages between each other, especially low-resource languages.
+- [google-t5/t5-base](https://huggingface.co/google-t5/t5-base): A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=translation&sort=trending).
 
@@ -33,7 +35,7 @@ Explore all available models and find the one that suits you best [here](https:/
 <InferenceSnippet
     pipeline=translation
 
-    providersMapping={ {"hf-inference":{"modelId":"facebook/nllb-200-distilled-600M","providerModelId":"facebook/nllb-200-distilled-600M"}} }
+    providersMapping={ {"hf-inference":{"modelId":"facebook/mbart-large-50-many-to-many-mmt","providerModelId":"facebook/mbart-large-50-many-to-many-mmt"}} }
 />
 
 
diff --git a/docs/inference-providers/tasks/zero-shot-classification.md b/docs/inference-providers/tasks/zero-shot-classification.md
index ba4c15738..93b9f109b 100644
--- a/docs/inference-providers/tasks/zero-shot-classification.md
+++ b/docs/inference-providers/tasks/zero-shot-classification.md
@@ -24,6 +24,7 @@ For more details about the `zero-shot-classification` task, check out its [dedic
 
 ### Recommended models
 
+- [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli): Powerful zero-shot text classification model.
 
 Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=zero-shot-classification&sort=trending).
 
diff --git a/scripts/inference-providers/scripts/generate.ts b/scripts/inference-providers/scripts/generate.ts
index bef7a678b..b8fe1d36c 100644
--- a/scripts/inference-providers/scripts/generate.ts
+++ b/scripts/inference-providers/scripts/generate.ts
@@ -405,18 +405,23 @@ await Promise.all(
           id: string;
           description: string;
           inference: string | undefined;
+          tags: string[] | undefined;
         }) => {
           console.log(`   ⚡ Checking inference status ${model.id}`);
           let url = `https://huggingface.co/api/models/${model.id}?expand[]=inference&expand[]=tags`;
           const modelData = await authFetchJson(url);
           model.inference = modelData.inference;
+          model.tags = modelData.tags;
         }
       )
     );
   })
 );
 
-async function fetchWarmModels(task: PipelineType): Promise<
+async function fetchWarmModels(
+  task: PipelineType,
+  conversational: boolean = false
+): Promise<
   {
     modelId: string;
     provider: string;
@@ -425,10 +430,14 @@ async function fetchWarmModels(task: PipelineType): Promise<
     tags: string[];
   }[]
 > {
-  const providers = [
-    "hf-inference",
-    ...(PER_TASK_SUPPORTED_PROVIDERS[task] ?? []),
-  ].sort();
+  const providers = (
+    conversational
+      ? [
+          "hf-inference",
+          ...(PER_TASK_SUPPORTED_PROVIDERS["conversational"] ?? []),
+        ]
+      : ["hf-inference", ...(PER_TASK_SUPPORTED_PROVIDERS[task] ?? [])]
+  ).sort();
   return (
     await Promise.all(
       providers.map(async (provider) => {
@@ -565,7 +574,7 @@ async function fetchChatCompletion() {
   ].filter((model) => model.tags?.includes("conversational"));
 
   const providersMappingChatCompletion = buildProviderMapping(
-    await fetchWarmModels("text-generation")
+    await fetchWarmModels("text-generation", true)
   );
   DATA.snippets["chat-completion"] = SNIPPETS_TEMPLATE({
     task: "text-generation",
@@ -585,7 +594,7 @@ async function fetchChatCompletion() {
       model.tags?.includes("conversational")
     );
   const providersMappingImageTextToText = buildProviderMapping(
-    await fetchWarmModels("image-text-to-text")
+    await fetchWarmModels("image-text-to-text", true)
   );
 
   DATA.snippets["conversational-image-text-to-text"] = SNIPPETS_TEMPLATE({
diff --git a/scripts/inference-providers/templates/task/audio-classification.handlebars b/scripts/inference-providers/templates/task/audio-classification.handlebars
index 8530b7de2..30a153ced 100644
--- a/scripts/inference-providers/templates/task/audio-classification.handlebars
+++ b/scripts/inference-providers/templates/task/audio-classification.handlebars
@@ -11,7 +11,7 @@ Example applications:
 
 ### Recommended models
 
-{{#each models.audio-classification}}
+{{#each recommendedModels.audio-classification}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/automatic-speech-recognition.handlebars b/scripts/inference-providers/templates/task/automatic-speech-recognition.handlebars
index fc81651df..7836c11b9 100644
--- a/scripts/inference-providers/templates/task/automatic-speech-recognition.handlebars
+++ b/scripts/inference-providers/templates/task/automatic-speech-recognition.handlebars
@@ -11,7 +11,7 @@ Example applications:
 
 ### Recommended models
 
-{{#each models.automatic-speech-recognition}}
+{{#each recommendedModels.automatic-speech-recognition}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/chat-completion.handlebars b/scripts/inference-providers/templates/task/chat-completion.handlebars
index da32ea5f1..01512b081 100644
--- a/scripts/inference-providers/templates/task/chat-completion.handlebars
+++ b/scripts/inference-providers/templates/task/chat-completion.handlebars
@@ -7,13 +7,13 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/inference-p
 
 #### Conversational Large Language Models (LLMs)
 
-{{#each models.chat-completion}}
+{{#each recommendedModels.chat-completion}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
 #### Conversational Vision-Language Models (VLMs)
 
-{{#each models.conversational-image-text-to-text}}
+{{#each recommendedModels.conversational-image-text-to-text}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/feature-extraction.handlebars b/scripts/inference-providers/templates/task/feature-extraction.handlebars
index 0b7b9748f..e31c2ecdf 100644
--- a/scripts/inference-providers/templates/task/feature-extraction.handlebars
+++ b/scripts/inference-providers/templates/task/feature-extraction.handlebars
@@ -11,7 +11,7 @@ Example applications:
 
 ### Recommended models
 
-{{#each models.feature-extraction}}
+{{#each recommendedModels.feature-extraction}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/fill-mask.handlebars b/scripts/inference-providers/templates/task/fill-mask.handlebars
index c9c131e22..0784ce319 100644
--- a/scripts/inference-providers/templates/task/fill-mask.handlebars
+++ b/scripts/inference-providers/templates/task/fill-mask.handlebars
@@ -6,7 +6,7 @@ Mask filling is the task of predicting the right word (token to be precise) in t
 
 ### Recommended models
 
-{{#each models.fill-mask}}
+{{#each recommendedModels.fill-mask}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/image-classification.handlebars b/scripts/inference-providers/templates/task/image-classification.handlebars
index 96a6ff49a..ad37828b1 100644
--- a/scripts/inference-providers/templates/task/image-classification.handlebars
+++ b/scripts/inference-providers/templates/task/image-classification.handlebars
@@ -6,7 +6,7 @@ Image classification is the task of assigning a label or class to an entire imag
 
 ### Recommended models
 
-{{#each models.image-classification}}
+{{#each recommendedModels.image-classification}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/image-segmentation.handlebars b/scripts/inference-providers/templates/task/image-segmentation.handlebars
index 11ea77f47..8e27797e2 100644
--- a/scripts/inference-providers/templates/task/image-segmentation.handlebars
+++ b/scripts/inference-providers/templates/task/image-segmentation.handlebars
@@ -6,7 +6,7 @@ Image Segmentation divides an image into segments where each pixel in the image
 
 ### Recommended models
 
-{{#each models.image-segmentation}}
+{{#each recommendedModels.image-segmentation}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/image-to-image.handlebars b/scripts/inference-providers/templates/task/image-to-image.handlebars
index ba21bf4fe..2d9ae5bfd 100644
--- a/scripts/inference-providers/templates/task/image-to-image.handlebars
+++ b/scripts/inference-providers/templates/task/image-to-image.handlebars
@@ -11,7 +11,7 @@ Example applications:
 
 ### Recommended models
 
-{{#each models.image-to-image}}
+{{#each recommendedModels.image-to-image}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/object-detection.handlebars b/scripts/inference-providers/templates/task/object-detection.handlebars
index 3892e34a3..8d4a7ad22 100644
--- a/scripts/inference-providers/templates/task/object-detection.handlebars
+++ b/scripts/inference-providers/templates/task/object-detection.handlebars
@@ -6,7 +6,7 @@ Object Detection models allow users to identify objects of certain defined class
 
 ### Recommended models
 
-{{#each models.object-detection}}
+{{#each recommendedModels.object-detection}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/question-answering.handlebars b/scripts/inference-providers/templates/task/question-answering.handlebars
index 3ca4e93d3..39b77a601 100644
--- a/scripts/inference-providers/templates/task/question-answering.handlebars
+++ b/scripts/inference-providers/templates/task/question-answering.handlebars
@@ -6,7 +6,7 @@ Question Answering models can retrieve the answer to a question from a given tex
 
 ### Recommended models
 
-{{#each models.question-answering}}
+{{#each recommendedModels.question-answering}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/summarization.handlebars b/scripts/inference-providers/templates/task/summarization.handlebars
index 1df382189..2df7ab361 100644
--- a/scripts/inference-providers/templates/task/summarization.handlebars
+++ b/scripts/inference-providers/templates/task/summarization.handlebars
@@ -6,7 +6,7 @@ Summarization is the task of producing a shorter version of a document while pre
 
 ### Recommended models
 
-{{#each models.summarization}}
+{{#each recommendedModels.summarization}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/table-question-answering.handlebars b/scripts/inference-providers/templates/task/table-question-answering.handlebars
index 087ff53bf..d72fbfa69 100644
--- a/scripts/inference-providers/templates/task/table-question-answering.handlebars
+++ b/scripts/inference-providers/templates/task/table-question-answering.handlebars
@@ -6,7 +6,7 @@ Table Question Answering (Table QA) is the answering a question about an informa
 
 ### Recommended models
 
-{{#each models.table-question-answering}}
+{{#each recommendedModels.table-question-answering}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/text-classification.handlebars b/scripts/inference-providers/templates/task/text-classification.handlebars
index 123d1f92a..6bf151da5 100644
--- a/scripts/inference-providers/templates/task/text-classification.handlebars
+++ b/scripts/inference-providers/templates/task/text-classification.handlebars
@@ -6,7 +6,7 @@ Text Classification is the task of assigning a label or class to a given text. S
 
 ### Recommended models
 
-{{#each models.text-classification}}
+{{#each recommendedModels.text-classification}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/text-generation.handlebars b/scripts/inference-providers/templates/task/text-generation.handlebars
index 9720cc175..a67cb55e6 100644
--- a/scripts/inference-providers/templates/task/text-generation.handlebars
+++ b/scripts/inference-providers/templates/task/text-generation.handlebars
@@ -8,7 +8,7 @@ If you are interested in a Chat Completion task, which generates a response base
 
 ### Recommended models
 
-{{#each models.text-generation}}
+{{#each recommendedModels.text-generation}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/text-to-image.handlebars b/scripts/inference-providers/templates/task/text-to-image.handlebars
index ac65056e6..58634c514 100644
--- a/scripts/inference-providers/templates/task/text-to-image.handlebars
+++ b/scripts/inference-providers/templates/task/text-to-image.handlebars
@@ -6,7 +6,7 @@ Generate an image based on a given text prompt.
 
 ### Recommended models
 
-{{#each models.text-to-image}}
+{{#each recommendedModels.text-to-image}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/token-classification.handlebars b/scripts/inference-providers/templates/task/token-classification.handlebars
index 9045de0ba..a342c4bb2 100644
--- a/scripts/inference-providers/templates/task/token-classification.handlebars
+++ b/scripts/inference-providers/templates/task/token-classification.handlebars
@@ -6,7 +6,7 @@ Token classification is a task in which a label is assigned to some tokens in a
 
 ### Recommended models
 
-{{#each models.token-classification}}
+{{#each recommendedModels.token-classification}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/translation.handlebars b/scripts/inference-providers/templates/task/translation.handlebars
index 7cbede05d..a161a3f9e 100644
--- a/scripts/inference-providers/templates/task/translation.handlebars
+++ b/scripts/inference-providers/templates/task/translation.handlebars
@@ -6,7 +6,7 @@ Translation is the task of converting text from one language to another.
 
 ### Recommended models
 
-{{#each models.translation}}
+{{#each recommendedModels.translation}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 
diff --git a/scripts/inference-providers/templates/task/zero-shot-classification.handlebars b/scripts/inference-providers/templates/task/zero-shot-classification.handlebars
index e0e830e93..4025e3726 100644
--- a/scripts/inference-providers/templates/task/zero-shot-classification.handlebars
+++ b/scripts/inference-providers/templates/task/zero-shot-classification.handlebars
@@ -6,7 +6,7 @@ Zero-shot text classification is super useful to try out classification with zer
 
 ### Recommended models
 
-{{#each models.zero-shot-classification}}
+{{#each recommendedModels.zero-shot-classification}}
 - [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
 {{/each}}
 

From b28b739f23dd6f559a3049a5fbf82cc549c0c012 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 2 Apr 2025 18:28:48 +0200
Subject: [PATCH 6/6] typo

---
 .../tasks/automatic-speech-recognition.md                   | 1 -
 docs/inference-providers/tasks/chat-completion.md           | 6 ++----
 docs/inference-providers/tasks/feature-extraction.md        | 1 -
 docs/inference-providers/tasks/image-classification.md      | 1 -
 docs/inference-providers/tasks/image-segmentation.md        | 1 -
 docs/inference-providers/tasks/image-to-image.md            | 1 -
 docs/inference-providers/tasks/object-detection.md          | 1 -
 docs/inference-providers/tasks/question-answering.md        | 1 -
 docs/inference-providers/tasks/summarization.md             | 1 -
 docs/inference-providers/tasks/text-classification.md       | 1 -
 docs/inference-providers/tasks/text-generation.md           | 1 -
 docs/inference-providers/tasks/text-to-image.md             | 1 -
 docs/inference-providers/tasks/token-classification.md      | 1 -
 docs/inference-providers/tasks/translation.md               | 1 -
 docs/inference-providers/tasks/zero-shot-classification.md  | 1 -
 .../templates/common/snippets-template.handlebars           | 3 +--
 16 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/docs/inference-providers/tasks/automatic-speech-recognition.md b/docs/inference-providers/tasks/automatic-speech-recognition.md
index 1bd8eb2c0..5b6d71362 100644
--- a/docs/inference-providers/tasks/automatic-speech-recognition.md
+++ b/docs/inference-providers/tasks/automatic-speech-recognition.md
@@ -39,7 +39,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=automatic-speech-recognition
-
     providersMapping={ {"fal-ai":{"modelId":"openai/whisper-large-v3","providerModelId":"fal-ai/whisper"},"hf-inference":{"modelId":"openai/whisper-large-v3-turbo","providerModelId":"openai/whisper-large-v3-turbo"}} }
 />
 
diff --git a/docs/inference-providers/tasks/chat-completion.md b/docs/inference-providers/tasks/chat-completion.md
index 4ad65bbc8..4844be8e6 100644
--- a/docs/inference-providers/tasks/chat-completion.md
+++ b/docs/inference-providers/tasks/chat-completion.md
@@ -60,9 +60,8 @@ The API supports:
 
 <InferenceSnippet
     pipeline=text-generation
-conversational
     providersMapping={ {"cerebras":{"modelId":"meta-llama/Llama-3.3-70B-Instruct","providerModelId":"llama-3.3-70b"},"fireworks-ai":{"modelId":"Qwen/QwQ-32B","providerModelId":"accounts/fireworks/models/qwq-32b"},"hf-inference":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B"},"hyperbolic":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B"},"nebius":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B-fast"},"novita":{"modelId":"Qwen/QwQ-32B","providerModelId":"qwen/qwq-32b"},"sambanova":{"modelId":"Qwen/QwQ-32B","providerModelId":"QwQ-32B"},"together":{"modelId":"deepseek-ai/DeepSeek-R1","providerModelId":"deepseek-ai/DeepSeek-R1"}} }
-/>
+conversational />
 
 
 
@@ -71,9 +70,8 @@ conversational
 
 <InferenceSnippet
     pipeline=image-text-to-text
-conversational
     providersMapping={ {"fireworks-ai":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"accounts/fireworks/models/llama-v3p2-11b-vision-instruct"},"hf-inference":{"modelId":"google/gemma-3-27b-it","providerModelId":"google/gemma-3-27b-it"},"hyperbolic":{"modelId":"Qwen/Qwen2.5-VL-7B-Instruct","providerModelId":"Qwen/Qwen2.5-VL-7B-Instruct"},"nebius":{"modelId":"google/gemma-3-27b-it","providerModelId":"google/gemma-3-27b-it-fast"},"novita":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"meta-llama/llama-3.2-11b-vision-instruct"},"sambanova":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"Llama-3.2-11B-Vision-Instruct"},"together":{"modelId":"meta-llama/Llama-3.2-11B-Vision-Instruct","providerModelId":"meta-llama/Llama-3.2-11B-Vision-Instruct"}} }
-/>
+conversational />
 
 
 
diff --git a/docs/inference-providers/tasks/feature-extraction.md b/docs/inference-providers/tasks/feature-extraction.md
index 7af7743a6..7ed41932a 100644
--- a/docs/inference-providers/tasks/feature-extraction.md
+++ b/docs/inference-providers/tasks/feature-extraction.md
@@ -38,7 +38,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=feature-extraction
-
     providersMapping={ {"hf-inference":{"modelId":"intfloat/multilingual-e5-large-instruct","providerModelId":"intfloat/multilingual-e5-large-instruct"},"sambanova":{"modelId":"intfloat/e5-mistral-7b-instruct","providerModelId":"E5-Mistral-7B-Instruct"}} }
 />
 
diff --git a/docs/inference-providers/tasks/image-classification.md b/docs/inference-providers/tasks/image-classification.md
index a71c03e03..0feb60ff9 100644
--- a/docs/inference-providers/tasks/image-classification.md
+++ b/docs/inference-providers/tasks/image-classification.md
@@ -35,7 +35,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=image-classification
-
     providersMapping={ {"hf-inference":{"modelId":"Falconsai/nsfw_image_detection","providerModelId":"Falconsai/nsfw_image_detection"}} }
 />
 
diff --git a/docs/inference-providers/tasks/image-segmentation.md b/docs/inference-providers/tasks/image-segmentation.md
index eb5f8709c..4e4942717 100644
--- a/docs/inference-providers/tasks/image-segmentation.md
+++ b/docs/inference-providers/tasks/image-segmentation.md
@@ -34,7 +34,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=image-segmentation
-
     providersMapping={ {"hf-inference":{"modelId":"jonathandinu/face-parsing","providerModelId":"jonathandinu/face-parsing"}} }
 />
 
diff --git a/docs/inference-providers/tasks/image-to-image.md b/docs/inference-providers/tasks/image-to-image.md
index fa29b62f5..908b6d393 100644
--- a/docs/inference-providers/tasks/image-to-image.md
+++ b/docs/inference-providers/tasks/image-to-image.md
@@ -37,7 +37,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=image-to-image
-
     providersMapping={ {"hf-inference":{"modelId":"enhanceaiteam/Flux-Uncensored-V2","providerModelId":"black-forest-labs/FLUX.1-dev"}} }
 />
 
diff --git a/docs/inference-providers/tasks/object-detection.md b/docs/inference-providers/tasks/object-detection.md
index cf6ca5d7d..c45de8a1c 100644
--- a/docs/inference-providers/tasks/object-detection.md
+++ b/docs/inference-providers/tasks/object-detection.md
@@ -33,7 +33,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=object-detection
-
     providersMapping={ {"hf-inference":{"modelId":"facebook/detr-resnet-50","providerModelId":"facebook/detr-resnet-50"}} }
 />
 
diff --git a/docs/inference-providers/tasks/question-answering.md b/docs/inference-providers/tasks/question-answering.md
index 77728499a..7d72c9ef1 100644
--- a/docs/inference-providers/tasks/question-answering.md
+++ b/docs/inference-providers/tasks/question-answering.md
@@ -35,7 +35,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=question-answering
-
     providersMapping={ {"hf-inference":{"modelId":"distilbert/distilbert-base-cased-distilled-squad","providerModelId":"distilbert/distilbert-base-cased-distilled-squad"}} }
 />
 
diff --git a/docs/inference-providers/tasks/summarization.md b/docs/inference-providers/tasks/summarization.md
index d2e711569..025b7e260 100644
--- a/docs/inference-providers/tasks/summarization.md
+++ b/docs/inference-providers/tasks/summarization.md
@@ -34,7 +34,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=summarization
-
     providersMapping={ {"hf-inference":{"modelId":"facebook/bart-large-cnn","providerModelId":"facebook/bart-large-cnn"}} }
 />
 
diff --git a/docs/inference-providers/tasks/text-classification.md b/docs/inference-providers/tasks/text-classification.md
index 32a5e5b2a..5c9e5b4de 100644
--- a/docs/inference-providers/tasks/text-classification.md
+++ b/docs/inference-providers/tasks/text-classification.md
@@ -37,7 +37,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=text-classification
-
     providersMapping={ {"hf-inference":{"modelId":"cardiffnlp/twitter-roberta-base-sentiment-latest","providerModelId":"cardiffnlp/twitter-roberta-base-sentiment-latest"}} }
 />
 
diff --git a/docs/inference-providers/tasks/text-generation.md b/docs/inference-providers/tasks/text-generation.md
index 2b973a75f..84f0c1282 100644
--- a/docs/inference-providers/tasks/text-generation.md
+++ b/docs/inference-providers/tasks/text-generation.md
@@ -40,7 +40,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=text-generation
-
     providersMapping={ {"hf-inference":{"modelId":"Qwen/QwQ-32B","providerModelId":"Qwen/QwQ-32B"},"together":{"modelId":"deepseek-ai/DeepSeek-R1","providerModelId":"deepseek-ai/DeepSeek-R1"}} }
 />
 
diff --git a/docs/inference-providers/tasks/text-to-image.md b/docs/inference-providers/tasks/text-to-image.md
index 7a6f19808..b18654994 100644
--- a/docs/inference-providers/tasks/text-to-image.md
+++ b/docs/inference-providers/tasks/text-to-image.md
@@ -35,7 +35,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=text-to-image
-
     providersMapping={ {"fal-ai":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"fal-ai/flux/dev"},"hf-inference":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/FLUX.1-dev"},"nebius":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/flux-dev"},"replicate":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/flux-dev"},"together":{"modelId":"black-forest-labs/FLUX.1-dev","providerModelId":"black-forest-labs/FLUX.1-dev"}} }
 />
 
diff --git a/docs/inference-providers/tasks/token-classification.md b/docs/inference-providers/tasks/token-classification.md
index 57a953648..4c49405fc 100644
--- a/docs/inference-providers/tasks/token-classification.md
+++ b/docs/inference-providers/tasks/token-classification.md
@@ -36,7 +36,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=token-classification
-
     providersMapping={ {"hf-inference":{"modelId":"dslim/bert-base-NER","providerModelId":"dslim/bert-base-NER"}} }
 />
 
diff --git a/docs/inference-providers/tasks/translation.md b/docs/inference-providers/tasks/translation.md
index 6016b3af4..261f13bca 100644
--- a/docs/inference-providers/tasks/translation.md
+++ b/docs/inference-providers/tasks/translation.md
@@ -34,7 +34,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=translation
-
     providersMapping={ {"hf-inference":{"modelId":"facebook/mbart-large-50-many-to-many-mmt","providerModelId":"facebook/mbart-large-50-many-to-many-mmt"}} }
 />
 
diff --git a/docs/inference-providers/tasks/zero-shot-classification.md b/docs/inference-providers/tasks/zero-shot-classification.md
index 93b9f109b..12eaca422 100644
--- a/docs/inference-providers/tasks/zero-shot-classification.md
+++ b/docs/inference-providers/tasks/zero-shot-classification.md
@@ -33,7 +33,6 @@ Explore all available models and find the one that suits you best [here](https:/
 
 <InferenceSnippet
     pipeline=zero-shot-classification
-
     providersMapping={ {"hf-inference":{"modelId":"facebook/bart-large-mnli","providerModelId":"facebook/bart-large-mnli"}} }
 />
 
diff --git a/scripts/inference-providers/templates/common/snippets-template.handlebars b/scripts/inference-providers/templates/common/snippets-template.handlebars
index 8c98bbc58..0be6fdea4 100644
--- a/scripts/inference-providers/templates/common/snippets-template.handlebars
+++ b/scripts/inference-providers/templates/common/snippets-template.handlebars
@@ -2,9 +2,8 @@
 
 <InferenceSnippet
     pipeline={{ task }}
-{{#if conversational }}conversational{{/if}}
     providersMapping={ {{{providersMappingAsStr}}} }
-/>
+{{#if conversational }}conversational {{/if}}/>
 
 {{else}}