Change default InferenceClient model to Qwen/Qwen3-Next-80B-A3B-Thinking (#1813)

suryabdev · web-flow · commit 2de6550dea9b · 2025-10-16T11:55:31.000+02:00
diff --git a/README.md b/README.md
@@ -137,7 +137,7 @@ model = OpenAIModel(
 from smolagents import TransformersModel
 
 model = TransformersModel(
-    model_id="Qwen/Qwen3-4B-Instruct-2507",
+    model_id="Qwen/Qwen3-Next-80B-A3B-Thinking",
     max_new_tokens=4096,
     device_map="auto"
 )
@@ -178,7 +178,7 @@ You can run agents from CLI using two commands: `smolagent` and `webagent`.
 `smolagent` is a generalist command to run a multi-step `CodeAgent` that can be equipped with various tools.
 
 ```bash
-smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "InferenceClientModel" --model-id "Qwen/Qwen3-Next-80B-A3B-Instruct" --imports pandas numpy --tools web_search
+smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "InferenceClientModel" --model-id "Qwen/Qwen3-Next-80B-A3B-Thinking" --imports pandas numpy --tools web_search
 ```
 
 Meanwhile `webagent` is a specific web-browsing agent using [helium](https://github.com/mherrmann/helium) (read more [here](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)).
diff --git a/docs/source/en/examples/async_agent.md b/docs/source/en/examples/async_agent.md
@@ -39,7 +39,7 @@ from starlette.routing import Route
 from smolagents import CodeAgent, InferenceClientModel
 
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=[],
 )
 
diff --git a/docs/source/en/examples/multiagents.md b/docs/source/en/examples/multiagents.md
@@ -36,13 +36,13 @@ from huggingface_hub import login
 login()
 ```
 
-⚡️ Our agent will be powered by [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) using `InferenceClientModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model.
+⚡️ Our agent will be powered by [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) using `InferenceClientModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model.
 
 > [!TIP]
 > Inference Providers give access to hundreds of models, powered by serverless inference partners. A list of supported providers can be found [here](https://huggingface.co/docs/inference-providers/index).
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 Create a web search tool
diff --git a/docs/source/en/examples/rag.md b/docs/source/en/examples/rag.md
@@ -156,7 +156,7 @@ from smolagents import InferenceClientModel, CodeAgent
 # Initialize the agent with our retriever tool
 agent = CodeAgent(
     tools=[retriever_tool],  # List of tools available to the agent
-    model=InferenceClientModel(),  # Default model "Qwen/Qwen2.5-Coder-32B-Instruct"
+    model=InferenceClientModel(),  # Default model "Qwen/Qwen3-Next-80B-A3B-Thinking"
     max_steps=4,  # Limit the number of reasoning steps
     verbosity_level=2,  # Show detailed agent reasoning
 )
diff --git a/docs/source/en/examples/text_to_sql.md b/docs/source/en/examples/text_to_sql.md
@@ -175,14 +175,14 @@ for table in ["receipts", "waiters"]:
 
 print(updated_description)
 ```
-Since this request is a bit harder than the previous one, we’ll switch the LLM engine to use the more powerful [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)!
+Since this request is a bit harder than the previous one, we’ll switch the LLM engine to use the more powerful [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)!
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/en/tutorials/tools.md b/docs/source/en/tutorials/tools.md
@@ -275,7 +275,7 @@ Then you can use this tool just like any other tool.  For example, let's improve
 ```python
 from smolagents import CodeAgent, InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -323,7 +323,7 @@ Let's add the `model_download_tool` to an existing agent initialized with only t
 ```python
 from smolagents import InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
diff --git a/docs/source/hi/examples/multiagents.md b/docs/source/hi/examples/multiagents.md
@@ -39,12 +39,12 @@ from huggingface_hub import login
 login()
 ```
 
-⚡️ हमारा एजेंट [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) द्वारा संचालित होगा जो `InferenceClientModel` क्लास का उपयोग करता है जो HF के Inference API का उपयोग करता है: Inference API किसी भी OS मॉडल को जल्दी और आसानी से चलाने की अनुमति देता है।
+⚡️ हमारा एजेंट [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) द्वारा संचालित होगा जो `InferenceClientModel` क्लास का उपयोग करता है जो HF के Inference API का उपयोग करता है: Inference API किसी भी OS मॉडल को जल्दी और आसानी से चलाने की अनुमति देता है।
 
 _नोट:_ The Inference API विभिन्न मानदंडों के आधार पर मॉडल होस्ट करता है, और डिप्लॉय किए गए मॉडल बिना पूर्व सूचना के अपडेट या बदले जा सकते हैं। इसके बारे में अधिक जानें [यहां](https://huggingface.co/docs/api-inference/supported-models)।
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 एक वेब सर्च टूल बनाएं
diff --git a/docs/source/hi/examples/text_to_sql.md b/docs/source/hi/examples/text_to_sql.md
@@ -166,14 +166,14 @@ for table in ["receipts", "waiters"]:
 
 print(updated_description)
 ```
-चूंकि यह रिक्वेस्ट पिछले वाले से थोड़ी कठिन है, हम LLM इंजन को अधिक शक्तिशाली [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) का उपयोग करने के लिए स्विच करेंगे!
+चूंकि यह रिक्वेस्ट पिछले वाले से थोड़ी कठिन है, हम LLM इंजन को अधिक शक्तिशाली [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) का उपयोग करने के लिए स्विच करेंगे!
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/hi/tutorials/tools.md b/docs/source/hi/tutorials/tools.md
@@ -121,7 +121,7 @@ image_generation_tool("A sunny beach")
 ```python
 from smolagents import CodeAgent, InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -169,7 +169,7 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode
 ```python
 from smolagents import InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
diff --git a/docs/source/ko/examples/async_agent.md b/docs/source/ko/examples/async_agent.md
@@ -39,7 +39,7 @@ from starlette.routing import Route
 from smolagents import CodeAgent, InferenceClientModel
 
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=[],
 )
 
diff --git a/docs/source/ko/examples/multiagents.md b/docs/source/ko/examples/multiagents.md
@@ -36,13 +36,13 @@ from huggingface_hub import login
 login()
 ```
 
-⚡️ 에이전트는 Hugging Face의 Inference API를 사용하는 `InferenceClientModel` 클래스를 통해 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)로 구동됩니다. Inference API를 사용하면 모든 오픈소스 모델을 빠르고 쉽게 실행할 수 있습니다.
+⚡️ 에이전트는 Hugging Face의 Inference API를 사용하는 `InferenceClientModel` 클래스를 통해 [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)로 구동됩니다. Inference API를 사용하면 모든 오픈소스 모델을 빠르고 쉽게 실행할 수 있습니다.
 
 > [!TIP]
 > Inference Providers는 서버리스 추론 파트너가 지원하는 수백 개의 모델에 대한 액세스를 제공합니다. 지원되는 프로바이더 목록은 [여기](https://huggingface.co/docs/inference-providers/index)에서 확인할 수 있습니다.
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 웹 검색 도구 생성
diff --git a/docs/source/ko/examples/text_to_sql.md b/docs/source/ko/examples/text_to_sql.md
@@ -176,14 +176,14 @@ for table in ["receipts", "waiters"]:
 
 print(updated_description)
 ```
-이번 요청은 이전보다 조금 더 어려우므로, 더 강력한 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) 모델을 사용하도록 LLM 엔진을 바꾸겠습니다!
+이번 요청은 이전보다 조금 더 어려우므로, 더 강력한 [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) 모델을 사용하도록 LLM 엔진을 바꾸겠습니다!
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/zh/examples/multiagents.md b/docs/source/zh/examples/multiagents.md
@@ -39,12 +39,12 @@ login()
 
 ⚡️ HF的Inference API 可以快速轻松地运行任何开源模型，因此我们的agent将使用HF的Inference API
 中的`InferenceClientModel`类来调用
-[Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)模型。
+[Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)模型。
 
 _Note:_ 基于多参数和部署模型的 Inference API 可能在没有预先通知的情况下更新或替换模型。了解更多信息，请参阅[这里](https://huggingface.co/docs/api-inference/supported-models)。
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 创建网络搜索工具
diff --git a/docs/source/zh/examples/text_to_sql.md b/docs/source/zh/examples/text_to_sql.md
@@ -162,14 +162,14 @@ for table in ["receipts", "waiters"]:
 print(updated_description)
 ```
 
-因为这个request 比之前的要难一些，我们将 LLM 引擎切换到更强大的 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)！
+因为这个request 比之前的要难一些，我们将 LLM 引擎切换到更强大的 [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)！
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/zh/tutorials/tools.md b/docs/source/zh/tutorials/tools.md
@@ -120,7 +120,7 @@ image_generation_tool("A sunny beach")
 ```python
 from smolagents import CodeAgent, InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -168,7 +168,7 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode
 ```python
 from smolagents import InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
diff --git a/examples/async_agent/main.py b/examples/async_agent/main.py
@@ -18,7 +18,7 @@
 def get_agent():
     # You can set custom model, or tools as needed
     return CodeAgent(
-        model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+        model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
         tools=[],
     )
 
diff --git a/examples/rag.py b/examples/rag.py
@@ -58,7 +58,7 @@ def forward(self, query: str) -> str:
 retriever_tool = RetrieverTool(docs_processed)
 agent = CodeAgent(
     tools=[retriever_tool],
-    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     max_steps=4,
     verbosity_level=2,
     stream_outputs=True,
diff --git a/examples/rag_using_chromadb.py b/examples/rag_using_chromadb.py
@@ -98,7 +98,7 @@ def forward(self, query: str) -> str:
 # Choose which LLM engine to use!
 
 # from smolagents import InferenceClientModel
-# model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Instruct")
+# model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 # from smolagents import TransformersModel
 # model = TransformersModel(model_id="Qwen/Qwen3-4B-Instruct-2507")
diff --git a/examples/server/README.md b/examples/server/README.md
@@ -1,6 +1,6 @@
 # Smolagents Chat Server Demo
 
-This is a simple web server that provides a chat interface for interacting with an AI code agent powered by `smolagents` and the Qwen2.5-Coder-32B-Instruct model, enhanced with MCP (Model Control Protocol) tools.
+This is a simple web server that provides a chat interface for interacting with an AI code agent powered by `smolagents` and the Qwen3-Next-80B-A3B-Thinking model, enhanced with MCP (Model Control Protocol) tools.
 
 ## Features
 
@@ -60,7 +60,7 @@ mcp_client = MCPClient(server_parameters=mcp_server_parameters)
 2. CodeAgent with MCP Tools:
 ```python
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=mcp_client.get_tools(),
 )
 ```
diff --git a/examples/server/main.py b/examples/server/main.py
@@ -15,7 +15,7 @@
 
 # Create a CodeAgent with a specific model and the tools from the MCP client
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=mcp_client.get_tools(),
 )
 
diff --git a/src/smolagents/cli.py b/src/smolagents/cli.py
@@ -44,7 +44,7 @@ def parse_arguments():
     parser.add_argument(
         "--model-id",
         type=str,
-        default="Qwen/Qwen2.5-Coder-32B-Instruct",
+        default="Qwen/Qwen3-Next-80B-A3B-Thinking",
         help="The model ID to use for the specified model type",
     )
     parser.add_argument(
diff --git a/src/smolagents/models.py b/src/smolagents/models.py
@@ -801,7 +801,7 @@ class TransformersModel(Model):
     Parameters:
         model_id (`str`):
             The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
-            For example, `"Qwen/Qwen2.5-Coder-32B-Instruct"`.
+            For example, `"Qwen/Qwen3-Next-80B-A3B-Thinking"`.
         device_map (`str`, *optional*):
             The device_map to initialize your model with.
         torch_dtype (`str`, *optional*):
@@ -823,7 +823,7 @@ class TransformersModel(Model):
     Example:
     ```python
     >>> engine = TransformersModel(
-    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    ...     model_id="Qwen/Qwen3-Next-80B-A3B-Thinking",
     ...     device="cuda",
     ...     max_new_tokens=5000,
     ... )
@@ -1387,10 +1387,10 @@ class InferenceClientModel(ApiModel):
     Providers include Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more.
 
     Parameters:
-        model_id (`str`, *optional*, default `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
+        model_id (`str`, *optional*, default `"Qwen/Qwen3-Next-80B-A3B-Thinking"`):
             The Hugging Face model ID to be used for inference.
             This can be a model identifier from the Hugging Face model hub or a URL to a deployed Inference Endpoint.
-            Currently, it defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`, but this may change in the future.
+            Currently, it defaults to `"Qwen/Qwen3-Next-80B-A3B-Thinking"`, but this may change in the future.
         provider (`str`, *optional*):
             Name of the provider to use for inference. A list of supported providers can be found in the [Inference Providers documentation](https://huggingface.co/docs/inference-providers/index#partners).
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order [here](https://hf.co/settings/inference-providers).
@@ -1439,7 +1439,7 @@ class InferenceClientModel(ApiModel):
 
     def __init__(
         self,
-        model_id: str = "Qwen/Qwen3-Next-80B-A3B-Instruct",
+        model_id: str = "Qwen/Qwen3-Next-80B-A3B-Thinking",
         provider: str | None = None,
         token: str | None = None,
         timeout: int = 120,

Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ from starlette.routing import Route`
`39`	`39`	`from smolagents import CodeAgent, InferenceClientModel`
`40`	`40`
`41`	`41`	`agent = CodeAgent(`
`42`		`- model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),`
	`42`	`+ model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),`
`43`	`43`	`tools=[],`
`44`	`44`	`)`
`45`	`45`
Original file line number	Diff line number	Diff line change
`@@ -156,7 +156,7 @@ from smolagents import InferenceClientModel, CodeAgent`
`156`	`156`	`# Initialize the agent with our retriever tool`
`157`	`157`	`agent = CodeAgent(`
`158`	`158`	`tools=[retriever_tool], # List of tools available to the agent`
`159`		`- model=InferenceClientModel(), # Default model "Qwen/Qwen2.5-Coder-32B-Instruct"`
	`159`	`+ model=InferenceClientModel(), # Default model "Qwen/Qwen3-Next-80B-A3B-Thinking"`
`160`	`160`	`max_steps=4, # Limit the number of reasoning steps`
`161`	`161`	`verbosity_level=2, # Show detailed agent reasoning`
`162`	`162`	`)`