Skip to content

Commit a1acba8

Browse files
authored
[model] support deepseek-ocr-2 (#7917)
1 parent fb4be50 commit a1acba8

File tree

8 files changed

+134
-40
lines changed

8 files changed

+134
-40
lines changed

docs/source/Instruction/Supported-models-and-datasets.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,9 @@
254254
|[Qwen/Qwen3-Embedding-0.6B](https://modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B)|qwen3_emb|qwen3_emb|-|✔|-|[Qwen/Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B)|
255255
|[Qwen/Qwen3-Embedding-4B](https://modelscope.cn/models/Qwen/Qwen3-Embedding-4B)|qwen3_emb|qwen3_emb|-|✔|-|[Qwen/Qwen3-Embedding-4B](https://huggingface.co/Qwen/Qwen3-Embedding-4B)|
256256
|[Qwen/Qwen3-Embedding-8B](https://modelscope.cn/models/Qwen/Qwen3-Embedding-8B)|qwen3_emb|qwen3_emb|-|✔|-|[Qwen/Qwen3-Embedding-8B](https://huggingface.co/Qwen/Qwen3-Embedding-8B)|
257-
|[Qwen/Qwen3-Reranker-0.6B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-0.6B)|qwen3_reranker|qwen3_reranker|-|✘|-|[Qwen/Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B)|
258-
|[Qwen/Qwen3-Reranker-4B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-4B)|qwen3_reranker|qwen3_reranker|-|✘|-|[Qwen/Qwen3-Reranker-4B](https://huggingface.co/Qwen/Qwen3-Reranker-4B)|
259-
|[Qwen/Qwen3-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-8B)|qwen3_reranker|qwen3_reranker|-|✘|-|[Qwen/Qwen3-Reranker-8B](https://huggingface.co/Qwen/Qwen3-Reranker-8B)|
257+
|[Qwen/Qwen3-Reranker-0.6B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-0.6B)|qwen3_reranker|qwen3_reranker|-|✔|-|[Qwen/Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B)|
258+
|[Qwen/Qwen3-Reranker-4B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-4B)|qwen3_reranker|qwen3_reranker|-|✔|-|[Qwen/Qwen3-Reranker-4B](https://huggingface.co/Qwen/Qwen3-Reranker-4B)|
259+
|[Qwen/Qwen3-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-8B)|qwen3_reranker|qwen3_reranker|-|✔|-|[Qwen/Qwen3-Reranker-8B](https://huggingface.co/Qwen/Qwen3-Reranker-8B)|
260260
|[iic/gte_Qwen2-1.5B-instruct](https://modelscope.cn/models/iic/gte_Qwen2-1.5B-instruct)|qwen2_gte|dummy|-|✘|-|[Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)|
261261
|[iic/gte_Qwen2-7B-instruct](https://modelscope.cn/models/iic/gte_Qwen2-7B-instruct)|qwen2_gte|dummy|-|✘|-|[Alibaba-NLP/gte-Qwen2-7B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)|
262262
|[codefuse-ai/CodeFuse-QWen-14B](https://modelscope.cn/models/codefuse-ai/CodeFuse-QWen-14B)|codefuse_qwen|codefuse|-|✘|coding|[codefuse-ai/CodeFuse-QWen-14B](https://huggingface.co/codefuse-ai/CodeFuse-QWen-14B)|
@@ -672,9 +672,9 @@
672672
|[BAAI/bge-reranker-large](https://modelscope.cn/models/BAAI/bge-reranker-large)|bge_reranker|bge_reranker|-|✘|-|[BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)|
673673
|[allenai/OLMoE-1B-7B-0125](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0125)|olmoe|olmoe|-|✔|-|[allenai/OLMoE-1B-7B-0125](https://huggingface.co/allenai/OLMoE-1B-7B-0125)|
674674
|[allenai/OLMoE-1B-7B-0125-Instruct](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0125-Instruct)|olmoe|olmoe|-|✔|-|[allenai/OLMoE-1B-7B-0125-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0125-Instruct)|
675-
|[allenai/OLMoE-1B-7B-0924](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924)|olmoe_0924|olmoe_0924|-|✔|-|[allenai/OLMoE-1B-7B-0924](https://huggingface.co/allenai/OLMoE-1B-7B-0924)|
676-
|[allenai/OLMoE-1B-7B-0924-Instruct](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-Instruct)|olmoe_0924|olmoe_0924|-|✔|-|[allenai/OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct)|
677-
|[allenai/OLMoE-1B-7B-0924-SFT](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-SFT)|olmoe_0924|olmoe_0924|-|✔|-|[allenai/OLMoE-1B-7B-0924-SFT](https://huggingface.co/allenai/OLMoE-1B-7B-0924-SFT)|
675+
|[allenai/OLMoE-1B-7B-0924](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924)|olmoe|olmoe_0924|-|✔|-|[allenai/OLMoE-1B-7B-0924](https://huggingface.co/allenai/OLMoE-1B-7B-0924)|
676+
|[allenai/OLMoE-1B-7B-0924-Instruct](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-Instruct)|olmoe|olmoe_0924|-|✔|-|[allenai/OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct)|
677+
|[allenai/OLMoE-1B-7B-0924-SFT](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-SFT)|olmoe|olmoe_0924|-|✔|-|[allenai/OLMoE-1B-7B-0924-SFT](https://huggingface.co/allenai/OLMoE-1B-7B-0924-SFT)|
678678
|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)|
679679
|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)|
680680
|[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|✘|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)|
@@ -768,8 +768,8 @@
768768
|[Qwen/Qwen3-VL-235B-A22B-Thinking-FP8](https://modelscope.cn/models/Qwen/Qwen3-VL-235B-A22B-Thinking-FP8)|qwen3_vl_moe|qwen3_vl|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|✔|vision, video|[Qwen/Qwen3-VL-235B-A22B-Thinking-FP8](https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Thinking-FP8)|
769769
|[Qwen/Qwen3-VL-Embedding-2B](https://modelscope.cn/models/Qwen/Qwen3-VL-Embedding-2B)|qwen3_vl_emb|qwen3_vl_emb|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|✔|vision, video|[Qwen/Qwen3-VL-Embedding-2B](https://huggingface.co/Qwen/Qwen3-VL-Embedding-2B)|
770770
|[Qwen/Qwen3-VL-Embedding-8B](https://modelscope.cn/models/Qwen/Qwen3-VL-Embedding-8B)|qwen3_vl_emb|qwen3_vl_emb|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|✔|vision, video|[Qwen/Qwen3-VL-Embedding-8B](https://huggingface.co/Qwen/Qwen3-VL-Embedding-8B)|
771-
|[Qwen/Qwen3-VL-Reranker-2B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-2B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|✘|vision, video|[Qwen/Qwen3-VL-Reranker-2B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-2B)|
772-
|[Qwen/Qwen3-VL-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-8B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|✘|vision, video|[Qwen/Qwen3-VL-Reranker-8B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-8B)|
771+
|[Qwen/Qwen3-VL-Reranker-2B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-2B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|✔|vision, video|[Qwen/Qwen3-VL-Reranker-2B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-2B)|
772+
|[Qwen/Qwen3-VL-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-8B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|✔|vision, video|[Qwen/Qwen3-VL-Reranker-8B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-8B)|
773773
|[iic/gme-Qwen2-VL-2B-Instruct](https://modelscope.cn/models/iic/gme-Qwen2-VL-2B-Instruct)|qwen2_gme|qwen2_gme|-|✘|vision|[Alibaba-NLP/gme-Qwen2-VL-2B-Instruct](https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct)|
774774
|[iic/gme-Qwen2-VL-7B-Instruct](https://modelscope.cn/models/iic/gme-Qwen2-VL-7B-Instruct)|qwen2_gme|qwen2_gme|-|✘|vision|[Alibaba-NLP/gme-Qwen2-VL-7B-Instruct](https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct)|
775775
|[AIDC-AI/Ovis1.6-Gemma2-9B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B)|ovis1_6|ovis1_6|transformers>=4.42|✘|vision|[AIDC-AI/Ovis1.6-Gemma2-9B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B)|
@@ -990,6 +990,7 @@
990990
|[deepseek-ai/Janus-Pro-1B](https://modelscope.cn/models/deepseek-ai/Janus-Pro-1B)|deepseek_janus_pro|deepseek_janus_pro|-|✘|vision|[deepseek-ai/Janus-Pro-1B](https://huggingface.co/deepseek-ai/Janus-Pro-1B)|
991991
|[deepseek-ai/Janus-Pro-7B](https://modelscope.cn/models/deepseek-ai/Janus-Pro-7B)|deepseek_janus_pro|deepseek_janus_pro|-|✘|vision|[deepseek-ai/Janus-Pro-7B](https://huggingface.co/deepseek-ai/Janus-Pro-7B)|
992992
|[deepseek-ai/DeepSeek-OCR](https://modelscope.cn/models/deepseek-ai/DeepSeek-OCR)|deepseek_ocr|deepseek_ocr|transformers==4.46.3, easydict|✘|vision|[deepseek-ai/DeepSeek-OCR](https://huggingface.co/deepseek-ai/DeepSeek-OCR)|
993+
|[deepseek-ai/DeepSeek-OCR-2](https://modelscope.cn/models/deepseek-ai/DeepSeek-OCR-2)|deepseek_ocr2|deepseek_ocr2|transformers==4.46.3, easydict|✘|vision|[deepseek-ai/DeepSeek-OCR-2](https://huggingface.co/deepseek-ai/DeepSeek-OCR-2)|
993994
|[OpenBMB/MiniCPM-V](https://modelscope.cn/models/OpenBMB/MiniCPM-V)|minicpmv|minicpmv|timm, transformers<4.42|&#x2718;|vision|[openbmb/MiniCPM-V](https://huggingface.co/openbmb/MiniCPM-V)|
994995
|[OpenBMB/MiniCPM-V-2](https://modelscope.cn/models/OpenBMB/MiniCPM-V-2)|minicpmv|minicpmv|timm, transformers<4.42|&#x2718;|vision|[openbmb/MiniCPM-V-2](https://huggingface.co/openbmb/MiniCPM-V-2)|
995996
|[OpenBMB/MiniCPM-Llama3-V-2_5](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5)|minicpmv2_5|minicpmv2_5|timm, transformers>=4.36|&#x2718;|vision|[openbmb/MiniCPM-Llama3-V-2_5](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5)|

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,9 @@ The table below introduces the models integrated with ms-swift:
255255
|[Qwen/Qwen3-Embedding-0.6B](https://modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B)|qwen3_emb|qwen3_emb|-|&#x2714;|-|[Qwen/Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B)|
256256
|[Qwen/Qwen3-Embedding-4B](https://modelscope.cn/models/Qwen/Qwen3-Embedding-4B)|qwen3_emb|qwen3_emb|-|&#x2714;|-|[Qwen/Qwen3-Embedding-4B](https://huggingface.co/Qwen/Qwen3-Embedding-4B)|
257257
|[Qwen/Qwen3-Embedding-8B](https://modelscope.cn/models/Qwen/Qwen3-Embedding-8B)|qwen3_emb|qwen3_emb|-|&#x2714;|-|[Qwen/Qwen3-Embedding-8B](https://huggingface.co/Qwen/Qwen3-Embedding-8B)|
258-
|[Qwen/Qwen3-Reranker-0.6B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-0.6B)|qwen3_reranker|qwen3_reranker|-|&#x2718;|-|[Qwen/Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B)|
259-
|[Qwen/Qwen3-Reranker-4B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-4B)|qwen3_reranker|qwen3_reranker|-|&#x2718;|-|[Qwen/Qwen3-Reranker-4B](https://huggingface.co/Qwen/Qwen3-Reranker-4B)|
260-
|[Qwen/Qwen3-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-8B)|qwen3_reranker|qwen3_reranker|-|&#x2718;|-|[Qwen/Qwen3-Reranker-8B](https://huggingface.co/Qwen/Qwen3-Reranker-8B)|
258+
|[Qwen/Qwen3-Reranker-0.6B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-0.6B)|qwen3_reranker|qwen3_reranker|-|&#x2714;|-|[Qwen/Qwen3-Reranker-0.6B](https://huggingface.co/Qwen/Qwen3-Reranker-0.6B)|
259+
|[Qwen/Qwen3-Reranker-4B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-4B)|qwen3_reranker|qwen3_reranker|-|&#x2714;|-|[Qwen/Qwen3-Reranker-4B](https://huggingface.co/Qwen/Qwen3-Reranker-4B)|
260+
|[Qwen/Qwen3-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-Reranker-8B)|qwen3_reranker|qwen3_reranker|-|&#x2714;|-|[Qwen/Qwen3-Reranker-8B](https://huggingface.co/Qwen/Qwen3-Reranker-8B)|
261261
|[iic/gte_Qwen2-1.5B-instruct](https://modelscope.cn/models/iic/gte_Qwen2-1.5B-instruct)|qwen2_gte|dummy|-|&#x2718;|-|[Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)|
262262
|[iic/gte_Qwen2-7B-instruct](https://modelscope.cn/models/iic/gte_Qwen2-7B-instruct)|qwen2_gte|dummy|-|&#x2718;|-|[Alibaba-NLP/gte-Qwen2-7B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)|
263263
|[codefuse-ai/CodeFuse-QWen-14B](https://modelscope.cn/models/codefuse-ai/CodeFuse-QWen-14B)|codefuse_qwen|codefuse|-|&#x2718;|coding|[codefuse-ai/CodeFuse-QWen-14B](https://huggingface.co/codefuse-ai/CodeFuse-QWen-14B)|
@@ -673,9 +673,9 @@ The table below introduces the models integrated with ms-swift:
673673
|[BAAI/bge-reranker-large](https://modelscope.cn/models/BAAI/bge-reranker-large)|bge_reranker|bge_reranker|-|&#x2718;|-|[BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)|
674674
|[allenai/OLMoE-1B-7B-0125](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0125)|olmoe|olmoe|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0125](https://huggingface.co/allenai/OLMoE-1B-7B-0125)|
675675
|[allenai/OLMoE-1B-7B-0125-Instruct](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0125-Instruct)|olmoe|olmoe|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0125-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0125-Instruct)|
676-
|[allenai/OLMoE-1B-7B-0924](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924)|olmoe_0924|olmoe_0924|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0924](https://huggingface.co/allenai/OLMoE-1B-7B-0924)|
677-
|[allenai/OLMoE-1B-7B-0924-Instruct](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-Instruct)|olmoe_0924|olmoe_0924|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct)|
678-
|[allenai/OLMoE-1B-7B-0924-SFT](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-SFT)|olmoe_0924|olmoe_0924|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0924-SFT](https://huggingface.co/allenai/OLMoE-1B-7B-0924-SFT)|
676+
|[allenai/OLMoE-1B-7B-0924](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924)|olmoe|olmoe_0924|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0924](https://huggingface.co/allenai/OLMoE-1B-7B-0924)|
677+
|[allenai/OLMoE-1B-7B-0924-Instruct](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-Instruct)|olmoe|olmoe_0924|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct)|
678+
|[allenai/OLMoE-1B-7B-0924-SFT](https://modelscope.cn/models/allenai/OLMoE-1B-7B-0924-SFT)|olmoe|olmoe_0924|-|&#x2714;|-|[allenai/OLMoE-1B-7B-0924-SFT](https://huggingface.co/allenai/OLMoE-1B-7B-0924-SFT)|
679679
|[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|&#x2718;|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)|
680680
|[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|&#x2718;|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)|
681681
|[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|&#x2718;|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)|
@@ -769,8 +769,8 @@ The table below introduces the models integrated with ms-swift:
769769
|[Qwen/Qwen3-VL-235B-A22B-Thinking-FP8](https://modelscope.cn/models/Qwen/Qwen3-VL-235B-A22B-Thinking-FP8)|qwen3_vl_moe|qwen3_vl|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|&#x2714;|vision, video|[Qwen/Qwen3-VL-235B-A22B-Thinking-FP8](https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Thinking-FP8)|
770770
|[Qwen/Qwen3-VL-Embedding-2B](https://modelscope.cn/models/Qwen/Qwen3-VL-Embedding-2B)|qwen3_vl_emb|qwen3_vl_emb|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|&#x2714;|vision, video|[Qwen/Qwen3-VL-Embedding-2B](https://huggingface.co/Qwen/Qwen3-VL-Embedding-2B)|
771771
|[Qwen/Qwen3-VL-Embedding-8B](https://modelscope.cn/models/Qwen/Qwen3-VL-Embedding-8B)|qwen3_vl_emb|qwen3_vl_emb|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|&#x2714;|vision, video|[Qwen/Qwen3-VL-Embedding-8B](https://huggingface.co/Qwen/Qwen3-VL-Embedding-8B)|
772-
|[Qwen/Qwen3-VL-Reranker-2B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-2B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|&#x2718;|vision, video|[Qwen/Qwen3-VL-Reranker-2B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-2B)|
773-
|[Qwen/Qwen3-VL-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-8B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|&#x2718;|vision, video|[Qwen/Qwen3-VL-Reranker-8B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-8B)|
772+
|[Qwen/Qwen3-VL-Reranker-2B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-2B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|&#x2714;|vision, video|[Qwen/Qwen3-VL-Reranker-2B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-2B)|
773+
|[Qwen/Qwen3-VL-Reranker-8B](https://modelscope.cn/models/Qwen/Qwen3-VL-Reranker-8B)|qwen3_vl_reranker|qwen3_vl_reranker|transformers>=4.57, qwen_vl_utils>=0.0.14, decord|&#x2714;|vision, video|[Qwen/Qwen3-VL-Reranker-8B](https://huggingface.co/Qwen/Qwen3-VL-Reranker-8B)|
774774
|[iic/gme-Qwen2-VL-2B-Instruct](https://modelscope.cn/models/iic/gme-Qwen2-VL-2B-Instruct)|qwen2_gme|qwen2_gme|-|&#x2718;|vision|[Alibaba-NLP/gme-Qwen2-VL-2B-Instruct](https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct)|
775775
|[iic/gme-Qwen2-VL-7B-Instruct](https://modelscope.cn/models/iic/gme-Qwen2-VL-7B-Instruct)|qwen2_gme|qwen2_gme|-|&#x2718;|vision|[Alibaba-NLP/gme-Qwen2-VL-7B-Instruct](https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct)|
776776
|[AIDC-AI/Ovis1.6-Gemma2-9B](https://modelscope.cn/models/AIDC-AI/Ovis1.6-Gemma2-9B)|ovis1_6|ovis1_6|transformers>=4.42|&#x2718;|vision|[AIDC-AI/Ovis1.6-Gemma2-9B](https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-9B)|
@@ -991,6 +991,7 @@ The table below introduces the models integrated with ms-swift:
991991
|[deepseek-ai/Janus-Pro-1B](https://modelscope.cn/models/deepseek-ai/Janus-Pro-1B)|deepseek_janus_pro|deepseek_janus_pro|-|&#x2718;|vision|[deepseek-ai/Janus-Pro-1B](https://huggingface.co/deepseek-ai/Janus-Pro-1B)|
992992
|[deepseek-ai/Janus-Pro-7B](https://modelscope.cn/models/deepseek-ai/Janus-Pro-7B)|deepseek_janus_pro|deepseek_janus_pro|-|&#x2718;|vision|[deepseek-ai/Janus-Pro-7B](https://huggingface.co/deepseek-ai/Janus-Pro-7B)|
993993
|[deepseek-ai/DeepSeek-OCR](https://modelscope.cn/models/deepseek-ai/DeepSeek-OCR)|deepseek_ocr|deepseek_ocr|transformers==4.46.3, easydict|&#x2718;|vision|[deepseek-ai/DeepSeek-OCR](https://huggingface.co/deepseek-ai/DeepSeek-OCR)|
994+
|[deepseek-ai/DeepSeek-OCR-2](https://modelscope.cn/models/deepseek-ai/DeepSeek-OCR-2)|deepseek_ocr2|deepseek_ocr2|transformers==4.46.3, easydict|&#x2718;|vision|[deepseek-ai/DeepSeek-OCR-2](https://huggingface.co/deepseek-ai/DeepSeek-OCR-2)|
994995
|[OpenBMB/MiniCPM-V](https://modelscope.cn/models/OpenBMB/MiniCPM-V)|minicpmv|minicpmv|timm, transformers<4.42|&#x2718;|vision|[openbmb/MiniCPM-V](https://huggingface.co/openbmb/MiniCPM-V)|
995996
|[OpenBMB/MiniCPM-V-2](https://modelscope.cn/models/OpenBMB/MiniCPM-V-2)|minicpmv|minicpmv|timm, transformers<4.42|&#x2718;|vision|[openbmb/MiniCPM-V-2](https://huggingface.co/openbmb/MiniCPM-V-2)|
996997
|[OpenBMB/MiniCPM-Llama3-V-2_5](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5)|minicpmv2_5|minicpmv2_5|timm, transformers>=4.36|&#x2718;|vision|[openbmb/MiniCPM-Llama3-V-2_5](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5)|

swift/model/constant.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ class MLLMModelType:
206206
deepseek_janus = 'deepseek_janus'
207207
deepseek_janus_pro = 'deepseek_janus_pro'
208208
deepseek_ocr = 'deepseek_ocr'
209+
deepseek_ocr2 = 'deepseek_ocr2'
209210

210211
minicpmv = 'minicpmv'
211212
minicpmv2_5 = 'minicpmv2_5'

swift/model/model_arch.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class MLLMModelArch:
5959
deepseek_vl2 = 'deepseek_vl2'
6060
deepseek_janus = 'deepseek_janus'
6161
deepseek_ocr = 'deepseek_ocr'
62+
deepseek_ocr2 = 'deepseek_ocr2'
6263

6364
mplug_owl2 = 'mplug_owl2'
6465
mplug_owl2_1 = 'mplug_owl2_1'
@@ -449,6 +450,14 @@ def register_model_arch(model_arch: ModelKeys, *, exist_ok: bool = False) -> Non
449450
aligner='model.projector',
450451
))
451452

453+
register_model_arch(
454+
MultiModelKeys(
455+
MLLMModelArch.deepseek_ocr2,
456+
language_model='model.layers',
457+
vision_tower=['model.sam_model', 'model.qwen2_model'],
458+
aligner='model.projector',
459+
))
460+
452461
register_model_arch(
453462
MultiModelKeys(
454463
MLLMModelArch.deepseek_vl2,

0 commit comments

Comments
 (0)