diff --git a/README.md b/README.md
index 2161015b3..dac23c04b 100644
--- a/README.md
+++ b/README.md
@@ -137,7 +137,7 @@ model = OpenAIModel(
 from smolagents import TransformersModel
 
 model = TransformersModel(
-    model_id="Qwen/Qwen3-4B-Instruct-2507",
+    model_id="Qwen/Qwen3-Next-80B-A3B-Thinking",
     max_new_tokens=4096,
     device_map="auto"
 )
@@ -178,7 +178,7 @@ You can run agents from CLI using two commands: `smolagent` and `webagent`.
 `smolagent` is a generalist command to run a multi-step `CodeAgent` that can be equipped with various tools.
 
 ```bash
-smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "InferenceClientModel" --model-id "Qwen/Qwen3-Next-80B-A3B-Instruct" --imports pandas numpy --tools web_search
+smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7."  --model-type "InferenceClientModel" --model-id "Qwen/Qwen3-Next-80B-A3B-Thinking" --imports pandas numpy --tools web_search
 ```
 
 Meanwhile `webagent` is a specific web-browsing agent using [helium](https://github.com/mherrmann/helium) (read more [here](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)).
diff --git a/docs/source/en/examples/async_agent.md b/docs/source/en/examples/async_agent.md
index d265fbdda..6b33a2345 100644
--- a/docs/source/en/examples/async_agent.md
+++ b/docs/source/en/examples/async_agent.md
@@ -39,7 +39,7 @@ from starlette.routing import Route
 from smolagents import CodeAgent, InferenceClientModel
 
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=[],
 )
 
diff --git a/docs/source/en/examples/multiagents.md b/docs/source/en/examples/multiagents.md
index 6bfee3ab1..ceaa02497 100644
--- a/docs/source/en/examples/multiagents.md
+++ b/docs/source/en/examples/multiagents.md
@@ -36,13 +36,13 @@ from huggingface_hub import login
 login()
 ```
 
-⚡️ Our agent will be powered by [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) using `InferenceClientModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model.
+⚡️ Our agent will be powered by [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) using `InferenceClientModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model.
 
 > [!TIP]
 > Inference Providers give access to hundreds of models, powered by serverless inference partners. A list of supported providers can be found [here](https://huggingface.co/docs/inference-providers/index).
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 Create a web search tool
diff --git a/docs/source/en/examples/rag.md b/docs/source/en/examples/rag.md
index c7a7fc2ef..cc056608c 100644
--- a/docs/source/en/examples/rag.md
+++ b/docs/source/en/examples/rag.md
@@ -156,7 +156,7 @@ from smolagents import InferenceClientModel, CodeAgent
 # Initialize the agent with our retriever tool
 agent = CodeAgent(
     tools=[retriever_tool],  # List of tools available to the agent
-    model=InferenceClientModel(),  # Default model "Qwen/Qwen2.5-Coder-32B-Instruct"
+    model=InferenceClientModel(),  # Default model "Qwen/Qwen3-Next-80B-A3B-Thinking"
     max_steps=4,  # Limit the number of reasoning steps
     verbosity_level=2,  # Show detailed agent reasoning
 )
diff --git a/docs/source/en/examples/text_to_sql.md b/docs/source/en/examples/text_to_sql.md
index 130fe8963..39aaf4cc6 100644
--- a/docs/source/en/examples/text_to_sql.md
+++ b/docs/source/en/examples/text_to_sql.md
@@ -175,14 +175,14 @@ for table in ["receipts", "waiters"]:
 
 print(updated_description)
 ```
-Since this request is a bit harder than the previous one, we’ll switch the LLM engine to use the more powerful [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)!
+Since this request is a bit harder than the previous one, we’ll switch the LLM engine to use the more powerful [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)!
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/en/tutorials/tools.md b/docs/source/en/tutorials/tools.md
index a07297799..b4a73c190 100644
--- a/docs/source/en/tutorials/tools.md
+++ b/docs/source/en/tutorials/tools.md
@@ -275,7 +275,7 @@ Then you can use this tool just like any other tool.  For example, let's improve
 ```python
 from smolagents import CodeAgent, InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -323,7 +323,7 @@ Let's add the `model_download_tool` to an existing agent initialized with only t
 ```python
 from smolagents import InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
diff --git a/docs/source/hi/examples/multiagents.md b/docs/source/hi/examples/multiagents.md
index 2599e59f7..10a628073 100644
--- a/docs/source/hi/examples/multiagents.md
+++ b/docs/source/hi/examples/multiagents.md
@@ -39,12 +39,12 @@ from huggingface_hub import login
 login()
 ```
 
-⚡️ हमारा एजेंट [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) द्वारा संचालित होगा जो `InferenceClientModel` क्लास का उपयोग करता है जो HF के Inference API का उपयोग करता है: Inference API किसी भी OS मॉडल को जल्दी और आसानी से चलाने की अनुमति देता है।
+⚡️ हमारा एजेंट [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) द्वारा संचालित होगा जो `InferenceClientModel` क्लास का उपयोग करता है जो HF के Inference API का उपयोग करता है: Inference API किसी भी OS मॉडल को जल्दी और आसानी से चलाने की अनुमति देता है।
 
 _नोट:_ The Inference API विभिन्न मानदंडों के आधार पर मॉडल होस्ट करता है, और डिप्लॉय किए गए मॉडल बिना पूर्व सूचना के अपडेट या बदले जा सकते हैं। इसके बारे में अधिक जानें [यहां](https://huggingface.co/docs/api-inference/supported-models)।
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 एक वेब सर्च टूल बनाएं
diff --git a/docs/source/hi/examples/text_to_sql.md b/docs/source/hi/examples/text_to_sql.md
index 69fc9820c..c4e28f039 100644
--- a/docs/source/hi/examples/text_to_sql.md
+++ b/docs/source/hi/examples/text_to_sql.md
@@ -166,14 +166,14 @@ for table in ["receipts", "waiters"]:
 
 print(updated_description)
 ```
-चूंकि यह रिक्वेस्ट पिछले वाले से थोड़ी कठिन है, हम LLM इंजन को अधिक शक्तिशाली [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) का उपयोग करने के लिए स्विच करेंगे!
+चूंकि यह रिक्वेस्ट पिछले वाले से थोड़ी कठिन है, हम LLM इंजन को अधिक शक्तिशाली [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) का उपयोग करने के लिए स्विच करेंगे!
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/hi/tutorials/tools.md b/docs/source/hi/tutorials/tools.md
index 6241bd56e..7d0a2de95 100644
--- a/docs/source/hi/tutorials/tools.md
+++ b/docs/source/hi/tutorials/tools.md
@@ -121,7 +121,7 @@ image_generation_tool("A sunny beach")
 ```python
 from smolagents import CodeAgent, InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -169,7 +169,7 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode
 ```python
 from smolagents import InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
diff --git a/docs/source/ko/examples/async_agent.md b/docs/source/ko/examples/async_agent.md
index c8f459e7d..bf402ba8e 100644
--- a/docs/source/ko/examples/async_agent.md
+++ b/docs/source/ko/examples/async_agent.md
@@ -39,7 +39,7 @@ from starlette.routing import Route
 from smolagents import CodeAgent, InferenceClientModel
 
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=[],
 )
 
diff --git a/docs/source/ko/examples/multiagents.md b/docs/source/ko/examples/multiagents.md
index 82452871d..373fdba26 100644
--- a/docs/source/ko/examples/multiagents.md
+++ b/docs/source/ko/examples/multiagents.md
@@ -36,13 +36,13 @@ from huggingface_hub import login
 login()
 ```
 
-⚡️ 에이전트는 Hugging Face의 Inference API를 사용하는 `InferenceClientModel` 클래스를 통해 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)로 구동됩니다. Inference API를 사용하면 모든 오픈소스 모델을 빠르고 쉽게 실행할 수 있습니다.
+⚡️ 에이전트는 Hugging Face의 Inference API를 사용하는 `InferenceClientModel` 클래스를 통해 [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)로 구동됩니다. Inference API를 사용하면 모든 오픈소스 모델을 빠르고 쉽게 실행할 수 있습니다.
 
 > [!TIP]
 > Inference Providers는 서버리스 추론 파트너가 지원하는 수백 개의 모델에 대한 액세스를 제공합니다. 지원되는 프로바이더 목록은 [여기](https://huggingface.co/docs/inference-providers/index)에서 확인할 수 있습니다.
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 웹 검색 도구 생성
diff --git a/docs/source/ko/examples/text_to_sql.md b/docs/source/ko/examples/text_to_sql.md
index a8ccd2aa5..d06a0b1ce 100644
--- a/docs/source/ko/examples/text_to_sql.md
+++ b/docs/source/ko/examples/text_to_sql.md
@@ -176,14 +176,14 @@ for table in ["receipts", "waiters"]:
 
 print(updated_description)
 ```
-이번 요청은 이전보다 조금 더 어려우므로, 더 강력한 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) 모델을 사용하도록 LLM 엔진을 바꾸겠습니다!
+이번 요청은 이전보다 조금 더 어려우므로, 더 강력한 [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking) 모델을 사용하도록 LLM 엔진을 바꾸겠습니다!
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/zh/examples/multiagents.md b/docs/source/zh/examples/multiagents.md
index ad6d2d94e..d756d052b 100644
--- a/docs/source/zh/examples/multiagents.md
+++ b/docs/source/zh/examples/multiagents.md
@@ -39,12 +39,12 @@ login()
 
 ⚡️ HF的Inference API 可以快速轻松地运行任何开源模型，因此我们的agent将使用HF的Inference API
 中的`InferenceClientModel`类来调用
-[Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)模型。
+[Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)模型。
 
 _Note:_ 基于多参数和部署模型的 Inference API 可能在没有预先通知的情况下更新或替换模型。了解更多信息，请参阅[这里](https://huggingface.co/docs/api-inference/supported-models)。
 
 ```py
-model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking"
 ```
 
 ## 🔍 创建网络搜索工具
diff --git a/docs/source/zh/examples/text_to_sql.md b/docs/source/zh/examples/text_to_sql.md
index 349d31f6f..65f8547e5 100644
--- a/docs/source/zh/examples/text_to_sql.md
+++ b/docs/source/zh/examples/text_to_sql.md
@@ -162,14 +162,14 @@ for table in ["receipts", "waiters"]:
 print(updated_description)
 ```
 
-因为这个request 比之前的要难一些，我们将 LLM 引擎切换到更强大的 [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)！
+因为这个request 比之前的要难一些，我们将 LLM 引擎切换到更强大的 [Qwen/Qwen3-Next-80B-A3B-Thinking](https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Thinking)！
 
 ```py
 sql_engine.description = updated_description
 
 agent = CodeAgent(
     tools=[sql_engine],
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
 )
 
 agent.run("Which waiter got more total money from tips?")
diff --git a/docs/source/zh/tutorials/tools.md b/docs/source/zh/tutorials/tools.md
index d9f58dfe3..e13f9a029 100644
--- a/docs/source/zh/tutorials/tools.md
+++ b/docs/source/zh/tutorials/tools.md
@@ -120,7 +120,7 @@ image_generation_tool("A sunny beach")
 ```python
 from smolagents import CodeAgent, InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 agent = CodeAgent(tools=[image_generation_tool], model=model)
 
 agent.run(
@@ -168,7 +168,7 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode
 ```python
 from smolagents import InferenceClientModel
 
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 agent = CodeAgent(tools=[], model=model, add_base_tools=True)
 agent.tools[model_download_tool.name] = model_download_tool
diff --git a/examples/async_agent/main.py b/examples/async_agent/main.py
index b07f86867..ac6e5026d 100644
--- a/examples/async_agent/main.py
+++ b/examples/async_agent/main.py
@@ -18,7 +18,7 @@
 def get_agent():
     # You can set custom model, or tools as needed
     return CodeAgent(
-        model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+        model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
         tools=[],
     )
 
diff --git a/examples/rag.py b/examples/rag.py
index 18bce3fbc..433ecfe96 100644
--- a/examples/rag.py
+++ b/examples/rag.py
@@ -58,7 +58,7 @@ def forward(self, query: str) -> str:
 retriever_tool = RetrieverTool(docs_processed)
 agent = CodeAgent(
     tools=[retriever_tool],
-    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     max_steps=4,
     verbosity_level=2,
     stream_outputs=True,
diff --git a/examples/rag_using_chromadb.py b/examples/rag_using_chromadb.py
index aabb57118..42b57caca 100644
--- a/examples/rag_using_chromadb.py
+++ b/examples/rag_using_chromadb.py
@@ -98,7 +98,7 @@ def forward(self, query: str) -> str:
 # Choose which LLM engine to use!
 
 # from smolagents import InferenceClientModel
-# model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Instruct")
+# model = InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking")
 
 # from smolagents import TransformersModel
 # model = TransformersModel(model_id="Qwen/Qwen3-4B-Instruct-2507")
diff --git a/examples/server/README.md b/examples/server/README.md
index d0135d6d3..35de80ce7 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -1,6 +1,6 @@
 # Smolagents Chat Server Demo
 
-This is a simple web server that provides a chat interface for interacting with an AI code agent powered by `smolagents` and the Qwen2.5-Coder-32B-Instruct model, enhanced with MCP (Model Control Protocol) tools.
+This is a simple web server that provides a chat interface for interacting with an AI code agent powered by `smolagents` and the Qwen3-Next-80B-A3B-Thinking model, enhanced with MCP (Model Control Protocol) tools.
 
 ## Features
 
@@ -60,7 +60,7 @@ mcp_client = MCPClient(server_parameters=mcp_server_parameters)
 2. CodeAgent with MCP Tools:
 ```python
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=mcp_client.get_tools(),
 )
 ```
diff --git a/examples/server/main.py b/examples/server/main.py
index 00ad0410d..f2425d7ac 100644
--- a/examples/server/main.py
+++ b/examples/server/main.py
@@ -15,7 +15,7 @@
 
 # Create a CodeAgent with a specific model and the tools from the MCP client
 agent = CodeAgent(
-    model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
+    model=InferenceClientModel(model_id="Qwen/Qwen3-Next-80B-A3B-Thinking"),
     tools=mcp_client.get_tools(),
 )
 
diff --git a/src/smolagents/cli.py b/src/smolagents/cli.py
index ccb8295ef..7403bc462 100644
--- a/src/smolagents/cli.py
+++ b/src/smolagents/cli.py
@@ -44,7 +44,7 @@ def parse_arguments():
     parser.add_argument(
         "--model-id",
         type=str,
-        default="Qwen/Qwen2.5-Coder-32B-Instruct",
+        default="Qwen/Qwen3-Next-80B-A3B-Thinking",
         help="The model ID to use for the specified model type",
     )
     parser.add_argument(
diff --git a/src/smolagents/models.py b/src/smolagents/models.py
index 5d482afbc..df9ebd2de 100644
--- a/src/smolagents/models.py
+++ b/src/smolagents/models.py
@@ -801,7 +801,7 @@ class TransformersModel(Model):
     Parameters:
         model_id (`str`):
             The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
-            For example, `"Qwen/Qwen2.5-Coder-32B-Instruct"`.
+            For example, `"Qwen/Qwen3-Next-80B-A3B-Thinking"`.
         device_map (`str`, *optional*):
             The device_map to initialize your model with.
         torch_dtype (`str`, *optional*):
@@ -823,7 +823,7 @@ class TransformersModel(Model):
     Example:
     ```python
     >>> engine = TransformersModel(
-    ...     model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+    ...     model_id="Qwen/Qwen3-Next-80B-A3B-Thinking",
     ...     device="cuda",
     ...     max_new_tokens=5000,
     ... )
@@ -1387,10 +1387,10 @@ class InferenceClientModel(ApiModel):
     Providers include Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more.
 
     Parameters:
-        model_id (`str`, *optional*, default `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
+        model_id (`str`, *optional*, default `"Qwen/Qwen3-Next-80B-A3B-Thinking"`):
             The Hugging Face model ID to be used for inference.
             This can be a model identifier from the Hugging Face model hub or a URL to a deployed Inference Endpoint.
-            Currently, it defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`, but this may change in the future.
+            Currently, it defaults to `"Qwen/Qwen3-Next-80B-A3B-Thinking"`, but this may change in the future.
         provider (`str`, *optional*):
             Name of the provider to use for inference. A list of supported providers can be found in the [Inference Providers documentation](https://huggingface.co/docs/inference-providers/index#partners).
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order [here](https://hf.co/settings/inference-providers).
@@ -1439,7 +1439,7 @@ class InferenceClientModel(ApiModel):
 
     def __init__(
         self,
-        model_id: str = "Qwen/Qwen3-Next-80B-A3B-Instruct",
+        model_id: str = "Qwen/Qwen3-Next-80B-A3B-Thinking",
         provider: str | None = None,
         token: str | None = None,
         timeout: int = 120,