WEIFENG2333 · yrk111222 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/README.md b/README.md
@@ -138,6 +138,7 @@ LLM 大模型是用来字幕段句、字幕优化、以及字幕翻译（如果
 | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
 | SiliconCloud   | [SiliconCloud 官网](https://cloud.siliconflow.cn/i/onCHcaDx)配置方法请参考[配置文档](./docs/llm_config.md)<br>该并发较低，建议把线程设置为5以下。 |
 | DeepSeek       | [DeepSeek 官网](https://platform.deepseek.com)，建议使用 `deepseek-v3` 模型，<br>官方网站最近服务好像并不太稳定。                                 |
+| ModelScope     | [ModelScope 官网](https://modelscope.cn/models?filter=inference_type&page=1&tabKey=task)配置方法请参考[配置文档](https://modelscope.cn/docs/model-service/API-Inference/intro)<br>该并发较低，建议把线程设置为5以下。 |
 | OpenAI兼容接口 | 如果有其他服务商的API，可直接在软件中填写。base_url 和api_key [VideoCaptioner API](https://api.videocaptioner.cn)                                 |
 
 注：如果用的 API 服务商不支持高并发，请在软件设置中将“线程数”调低，避免请求错误。

diff --git a/app/common/config.py b/app/common/config.py
@@ -125,6 +125,12 @@ class Config(QConfig):
         "LLM", "ChatGLM_API_Base", "https://open.bigmodel.cn/api/paas/v4"
     )
 
+    modelscope_model = ConfigItem("LLM", "ModelScope_Model", "Qwen/Qwen3-8B")
+    modelscope_api_key = ConfigItem("LLM", "ModelScope_API_Key", "")
+    modelscope_api_base = ConfigItem(
+        "LLM", "ModelScope_API_Base", "https://api-inference.modelscope.cn/v1"
+    )
+
     # ------------------- 翻译配置 -------------------
     translator_service = OptionsConfigItem(
         "Translate",

diff --git a/app/core/entities.py b/app/core/entities.py
@@ -105,6 +105,7 @@ class LLMServiceEnum(Enum):
     LM_STUDIO = "LM Studio"
     GEMINI = "Gemini"
     CHATGLM = "ChatGLM"
+    MODELSCOPE = "ModelScope"
 
 
 class TranscribeModelEnum(Enum):

diff --git a/app/core/llm/check_llm.py b/app/core/llm/check_llm.py
@@ -26,17 +26,41 @@ def check_llm_connection(
         # 创建OpenAI客户端并发送请求到API
         base_url = normalize_base_url(base_url)
         api_key = api_key.strip()
-        response = openai.OpenAI(
-            base_url=base_url, api_key=api_key, timeout=60
-        ).chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": 'Just respond with "Hello"!'},
-            ],
-            timeout=30,
-        )
-        return True, response.choices[0].message.content
+        # Check whether it is the ModelScope platform, as some models require the stream and enable_thinking parameter
+        if "modelscope" in base_url:
+            extra_body = {"enable_thinking": True}
+            response_stream = openai.OpenAI(
+                base_url=base_url, api_key=api_key, timeout=60
+            ).chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", 'content': 'Just respond with "Hello"!'},
-                    {"role": "user", 'content': 'Just respond with "Hello"!'},
+                    {"role": "user", "content": "Just respond with \"Hello\"!"},
-                    {"role": "user", 'content': 'Just respond with "Hello"!'},
+                    {"role": "user", "content": "Just respond with \"Hello\"!"},
+                ],
+                stream=True,
+                extra_body=extra_body,
+                timeout=30,
+            )
+
+            full_answer = ""
+            for chunk in response_stream:
+                if chunk.choices and chunk.choices[0].delta.content:
+                    full_answer += chunk.choices[0].delta.content
+            if not full_answer:
+                raise ValueError("ModelScope streaming response yielded no content")
+            return True, full_answer.strip()
+        else:
+            response = openai.OpenAI(
+                base_url=base_url, api_key=api_key, timeout=60
+            ).chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": 'Just respond with "Hello"!'},
+                ],
+                timeout=30,
+            )
+            return True, response.choices[0].message.content
     except openai.APIConnectionError:
         return False, "API Connection Error. Please check your network or VPN."
     except openai.RateLimitError as e:

diff --git a/app/core/llm/client.py b/app/core/llm/client.py
@@ -3,6 +3,7 @@
 import os
 import threading
 from typing import Any, List, Optional
+from types import SimpleNamespace
 from urllib.parse import urlparse, urlunparse
 
 import openai
@@ -135,13 +136,37 @@ def call_llm(
         ValueError: If response is invalid (empty choices or content)
     """
     client = get_llm_client()
+    # Check whether it is the ModelScope platform, as some models require the stream and enable_thinking parameters
+    if "modelscope" in str(client.base_url):
+        logger.info("Detected ModelScope API, using stream mode with enable_thinking=True.")
+
+        extra_body = {"enable_thinking": True}
+
+        response_stream = client.chat.completions.create(
+            model=model,
+            messages=messages,  # pyright: ignore[reportArgumentType]
+            temperature=temperature,
+            stream=True,
+            extra_body=extra_body,
+            **kwargs,
+        )
 
-    response = client.chat.completions.create(
-        model=model,
-        messages=messages,  # pyright: ignore[reportArgumentType]
-        temperature=temperature,
-        **kwargs,
-    )
+        full_content = ""
+        for chunk in response_stream:
+            if chunk.choices and chunk.choices[0].delta.content:
+                full_content += chunk.choices[0].delta.content
+        if not full_content:
+            raise ValueError("ModelScope streaming response yielded no content")
+        fake_message = SimpleNamespace(content=full_content)
+        fake_choice = SimpleNamespace(message=fake_message)
+        response = SimpleNamespace(choices=[fake_choice])
-        fake_message = SimpleNamespace(content=full_content)
-        fake_choice = SimpleNamespace(message=fake_message)
-        response = SimpleNamespace(choices=[fake_choice])
+        import time
+        fake_message = SimpleNamespace(content=full_content)
+        fake_choice = SimpleNamespace(message=fake_message, finish_reason="stop", index=0)
+        response = SimpleNamespace(
+            id="chatcmpl-fake-modelscope",
+            object="chat.completion",
+            created=int(time.time()),
+            model=model,
+            choices=[fake_choice],
+        )
-        fake_message = SimpleNamespace(content=full_content)
-        fake_choice = SimpleNamespace(message=fake_message)
-        response = SimpleNamespace(choices=[fake_choice])
+        import time
+        fake_message = SimpleNamespace(content=full_content)
+        fake_choice = SimpleNamespace(message=fake_message, finish_reason="stop", index=0)
+        response = SimpleNamespace(
+            id="chatcmpl-fake-modelscope",
+            object="chat.completion",
+            created=int(time.time()),
+            model=model,
+            choices=[fake_choice],
+        )
+    else:
+        response = client.chat.completions.create(
+            model=model,
+            messages=messages,  # pyright: ignore[reportArgumentType]
+            temperature=temperature,
+            **kwargs,
+        )
 
     # Validate response (exceptions are not cached by diskcache)
     if not (

diff --git a/app/core/task_factory.py b/app/core/task_factory.py
@@ -142,6 +142,10 @@ def create_subtitle_task(
             base_url = cfg.chatglm_api_base.value
             api_key = cfg.chatglm_api_key.value
             llm_model = cfg.chatglm_model.value
+        elif current_service == LLMServiceEnum.MODELSCOPE:
+            base_url = cfg.modelscope_api_base.value
+            api_key = cfg.modelscope_api_key.value
+            llm_model = cfg.modelscope_model.value
         else:
             base_url = ""
             api_key = ""

diff --git a/app/view/setting_interface.py b/app/view/setting_interface.py
@@ -353,6 +353,14 @@ def __createLLMServiceCards(self):
                 "default_base": "https://open.bigmodel.cn/api/paas/v4",
                 "default_models": ["glm-4-plus", "glm-4-air-250414", "glm-4-flash"],
             },
+            LLMServiceEnum.MODELSCOPE: {
+                "prefix": "modelscope",
+                "api_key_cfg": cfg.modelscope_api_key,
+                "api_base_cfg": cfg.modelscope_api_base,
+                "model_cfg": cfg.modelscope_model,
+                "default_base": "https://api-inference.modelscope.cn/v1",
+                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen3-30B-A3B-Instruct-2507", "deepseek-ai/DeepSeek-V3.1"],
-                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen3-30B-A3B-Instruct-2507", "deepseek-ai/DeepSeek-V3.1"],
+                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen2.5-32B-Instruct", "deepseek-ai/DeepSeek-V3.1"],
-                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen3-30B-A3B-Instruct-2507", "deepseek-ai/DeepSeek-V3.1"],
+                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen3-30B-A3B-Instruct-2507", "deepseek-ai/DeepSeek-V3"],
-                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen3-30B-A3B-Instruct-2507", "deepseek-ai/DeepSeek-V3.1"],
+                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen2.5-32B-Instruct", "deepseek-ai/DeepSeek-V3.1"],
-                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen3-30B-A3B-Instruct-2507", "deepseek-ai/DeepSeek-V3.1"],
+                "default_models": ["Qwen/Qwen3-8B", "Qwen/Qwen3-30B-A3B-Instruct-2507", "deepseek-ai/DeepSeek-V3"],
+            },
         }
 
         # 创建服务配置映射