Skip to content

Commit 541223c

Browse files
hjh0119Jintao-Huang
authored andcommitted
[template] update mllm template & InternVL-HF (#5829)
1 parent 53efa65 commit 541223c

File tree

11 files changed

+200
-29
lines changed

11 files changed

+200
-29
lines changed

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,21 @@
796796
|[OpenGVLab/InternVL3-14B-AWQ](https://modelscope.cn/models/OpenGVLab/InternVL3-14B-AWQ)|internvl3|internvl2_5|transformers>=4.37.2, timm|✘|vision, video|[OpenGVLab/InternVL3-14B-AWQ](https://huggingface.co/OpenGVLab/InternVL3-14B-AWQ)|
797797
|[OpenGVLab/InternVL3-38B-AWQ](https://modelscope.cn/models/OpenGVLab/InternVL3-38B-AWQ)|internvl3|internvl2_5|transformers>=4.37.2, timm|✘|vision, video|[OpenGVLab/InternVL3-38B-AWQ](https://huggingface.co/OpenGVLab/InternVL3-38B-AWQ)|
798798
|[OpenGVLab/InternVL3-78B-AWQ](https://modelscope.cn/models/OpenGVLab/InternVL3-78B-AWQ)|internvl3|internvl2_5|transformers>=4.37.2, timm|✘|vision, video|[OpenGVLab/InternVL3-78B-AWQ](https://huggingface.co/OpenGVLab/InternVL3-78B-AWQ)|
799+
|[OpenGVLab/InternVL3-1B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-1B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3-1B-hf](https://huggingface.co/OpenGVLab/InternVL3-1B-hf)|
800+
|[OpenGVLab/InternVL3-2B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-2B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3-2B-hf](https://huggingface.co/OpenGVLab/InternVL3-2B-hf)|
801+
|[OpenGVLab/InternVL3-8B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-8B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3-8B-hf](https://huggingface.co/OpenGVLab/InternVL3-8B-hf)|
802+
|[OpenGVLab/InternVL3-9B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-9B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3-9B-hf](https://huggingface.co/OpenGVLab/InternVL3-9B-hf)|
803+
|[OpenGVLab/InternVL3-14B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-14B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3-14B-hf](https://huggingface.co/OpenGVLab/InternVL3-14B-hf)|
804+
|[OpenGVLab/InternVL3-38B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-38B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3-38B-hf](https://huggingface.co/OpenGVLab/InternVL3-38B-hf)|
805+
|[OpenGVLab/InternVL3-78B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-78B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3-78B-hf](https://huggingface.co/OpenGVLab/InternVL3-78B-hf)|
806+
|[OpenGVLab/InternVL3_5-1B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-1B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-1B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-1B-HF)|
807+
|[OpenGVLab/InternVL3_5-2B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-2B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-2B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-2B-HF)|
808+
|[OpenGVLab/InternVL3_5-4B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-4B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-4B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-4B-HF)|
809+
|[OpenGVLab/InternVL3_5-8B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-8B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-8B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-8B-HF)|
810+
|[OpenGVLab/InternVL3_5-14B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-14B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-14B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-14B-HF)|
811+
|[OpenGVLab/InternVL3_5-38B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-38B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-38B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-38B-HF)|
812+
|[OpenGVLab/InternVL3_5-30B-A3B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-30B-A3B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B-HF)|
813+
|[OpenGVLab/InternVL3_5-241B-A28B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|✘|vision, video|[OpenGVLab/InternVL3_5-241B-A28B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B-HF)|
799814
|[OpenGVLab/InternVL3_5-1B-Pretrained](https://modelscope.cn/models/OpenGVLab/InternVL3_5-1B-Pretrained)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|✔|vision, video|[OpenGVLab/InternVL3_5-1B-Pretrained](https://huggingface.co/OpenGVLab/InternVL3_5-1B-Pretrained)|
800815
|[OpenGVLab/InternVL3_5-2B-Pretrained](https://modelscope.cn/models/OpenGVLab/InternVL3_5-2B-Pretrained)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|✔|vision, video|[OpenGVLab/InternVL3_5-2B-Pretrained](https://huggingface.co/OpenGVLab/InternVL3_5-2B-Pretrained)|
801816
|[OpenGVLab/InternVL3_5-4B-Pretrained](https://modelscope.cn/models/OpenGVLab/InternVL3_5-4B-Pretrained)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|✔|vision, video|[OpenGVLab/InternVL3_5-4B-Pretrained](https://huggingface.co/OpenGVLab/InternVL3_5-4B-Pretrained)|
@@ -829,6 +844,7 @@
829844
|[OpenGVLab/InternVL3_5-30B-A3B](https://modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|✔|vision, video|[OpenGVLab/InternVL3_5-30B-A3B](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B)|
830845
|[OpenGVLab/InternVL3_5-241B-A28B](https://modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|✔|vision, video|[OpenGVLab/InternVL3_5-241B-A28B](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B)|
831846
|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview](https://modelscope.cn/models/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview)|internvl3_5_gpt|internvl3_5_gpt|transformers>=4.37.2, timm|✘|vision, video|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview](https://huggingface.co/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview)|
847+
|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF)|internvl_gpt_hf|internvl_hf|transformers>=4.55.0, timm|✘|vision, video|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF](https://huggingface.co/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF)|
832848
|[Shanghai_AI_Laboratory/Intern-S1-mini](https://modelscope.cn/models/Shanghai_AI_Laboratory/Intern-S1-mini)|interns1|interns1|transformers>=4.55.2|✘|vision, video|[internlm/Intern-S1-mini](https://huggingface.co/internlm/Intern-S1-mini)|
833849
|[Shanghai_AI_Laboratory/Intern-S1](https://modelscope.cn/models/Shanghai_AI_Laboratory/Intern-S1)|interns1|interns1|transformers>=4.55.2|✘|vision, video|[internlm/Intern-S1](https://huggingface.co/internlm/Intern-S1)|
834850
|[Shanghai_AI_Laboratory/Intern-S1-mini-FP8](https://modelscope.cn/models/Shanghai_AI_Laboratory/Intern-S1-mini-FP8)|interns1|interns1|transformers>=4.55.2|✘|vision, video|[internlm/Intern-S1-mini-FP8](https://huggingface.co/internlm/Intern-S1-mini-FP8)|
@@ -905,7 +921,7 @@
905921
|[moonshotai/Kimi-VL-A3B-Thinking](https://modelscope.cn/models/moonshotai/Kimi-VL-A3B-Thinking)|kimi_vl|kimi_vl|transformers<4.49|&#x2718;|-|[moonshotai/Kimi-VL-A3B-Thinking](https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking)|
906922
|[moonshotai/Kimi-VL-A3B-Thinking-2506](https://modelscope.cn/models/moonshotai/Kimi-VL-A3B-Thinking-2506)|kimi_vl|kimi_vl|transformers<4.49|&#x2718;|-|[moonshotai/Kimi-VL-A3B-Thinking-2506](https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking-2506)|
907923
|[Kwai-Keye/Keye-VL-8B-Preview](https://modelscope.cn/models/Kwai-Keye/Keye-VL-8B-Preview)|keye_vl|keye_vl|keye_vl_utils|&#x2718;|vision|[Kwai-Keye/Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview)|
908-
|[Kwai-Keye/Keye-VL-1_5-8B](https://modelscope.cn/models/Kwai-Keye/Keye-VL-1_5-8B)|keye_vl_1_5|keye_vl|keye_vl_utils>=1.5.2|&#x2718;|vision|[Kwai-Keye/Keye-VL-1_5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-1_5-8B)|
924+
|[Kwai-Keye/Keye-VL-1_5-8B](https://modelscope.cn/models/Kwai-Keye/Keye-VL-1_5-8B)|keye_vl_1_5|keye_vl_1_5|keye_vl_utils>=1.5.2|&#x2718;|vision|[Kwai-Keye/Keye-VL-1_5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-1_5-8B)|
909925
|[rednote-hilab/dots.ocr](https://modelscope.cn/models/rednote-hilab/dots.ocr)|dots_ocr|dots_ocr|transformers>=4.51.0|&#x2718;|-|[rednote-hilab/dots.ocr](https://huggingface.co/rednote-hilab/dots.ocr)|
910926
|[LLM-Research/Phi-3-vision-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-vision-128k-instruct)|phi3_vision|phi3_vision|transformers>=4.36|&#x2718;|vision|[microsoft/Phi-3-vision-128k-instruct](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct)|
911927
|[LLM-Research/Phi-3.5-vision-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-vision-instruct)|phi3_vision|phi3_vision|transformers>=4.36|&#x2718;|vision|[microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)|

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,21 @@ The table below introduces the models integrated with ms-swift:
796796
|[OpenGVLab/InternVL3-14B-AWQ](https://modelscope.cn/models/OpenGVLab/InternVL3-14B-AWQ)|internvl3|internvl2_5|transformers>=4.37.2, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-14B-AWQ](https://huggingface.co/OpenGVLab/InternVL3-14B-AWQ)|
797797
|[OpenGVLab/InternVL3-38B-AWQ](https://modelscope.cn/models/OpenGVLab/InternVL3-38B-AWQ)|internvl3|internvl2_5|transformers>=4.37.2, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-38B-AWQ](https://huggingface.co/OpenGVLab/InternVL3-38B-AWQ)|
798798
|[OpenGVLab/InternVL3-78B-AWQ](https://modelscope.cn/models/OpenGVLab/InternVL3-78B-AWQ)|internvl3|internvl2_5|transformers>=4.37.2, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-78B-AWQ](https://huggingface.co/OpenGVLab/InternVL3-78B-AWQ)|
799+
|[OpenGVLab/InternVL3-1B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-1B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-1B-hf](https://huggingface.co/OpenGVLab/InternVL3-1B-hf)|
800+
|[OpenGVLab/InternVL3-2B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-2B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-2B-hf](https://huggingface.co/OpenGVLab/InternVL3-2B-hf)|
801+
|[OpenGVLab/InternVL3-8B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-8B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-8B-hf](https://huggingface.co/OpenGVLab/InternVL3-8B-hf)|
802+
|[OpenGVLab/InternVL3-9B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-9B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-9B-hf](https://huggingface.co/OpenGVLab/InternVL3-9B-hf)|
803+
|[OpenGVLab/InternVL3-14B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-14B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-14B-hf](https://huggingface.co/OpenGVLab/InternVL3-14B-hf)|
804+
|[OpenGVLab/InternVL3-38B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-38B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-38B-hf](https://huggingface.co/OpenGVLab/InternVL3-38B-hf)|
805+
|[OpenGVLab/InternVL3-78B-hf](https://modelscope.cn/models/OpenGVLab/InternVL3-78B-hf)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3-78B-hf](https://huggingface.co/OpenGVLab/InternVL3-78B-hf)|
806+
|[OpenGVLab/InternVL3_5-1B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-1B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-1B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-1B-HF)|
807+
|[OpenGVLab/InternVL3_5-2B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-2B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-2B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-2B-HF)|
808+
|[OpenGVLab/InternVL3_5-4B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-4B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-4B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-4B-HF)|
809+
|[OpenGVLab/InternVL3_5-8B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-8B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-8B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-8B-HF)|
810+
|[OpenGVLab/InternVL3_5-14B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-14B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-14B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-14B-HF)|
811+
|[OpenGVLab/InternVL3_5-38B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-38B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-38B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-38B-HF)|
812+
|[OpenGVLab/InternVL3_5-30B-A3B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-30B-A3B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B-HF)|
813+
|[OpenGVLab/InternVL3_5-241B-A28B-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B-HF)|internvl_hf|internvl_hf|transformers>=4.52.1, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-241B-A28B-HF](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B-HF)|
799814
|[OpenGVLab/InternVL3_5-1B-Pretrained](https://modelscope.cn/models/OpenGVLab/InternVL3_5-1B-Pretrained)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|&#x2714;|vision, video|[OpenGVLab/InternVL3_5-1B-Pretrained](https://huggingface.co/OpenGVLab/InternVL3_5-1B-Pretrained)|
800815
|[OpenGVLab/InternVL3_5-2B-Pretrained](https://modelscope.cn/models/OpenGVLab/InternVL3_5-2B-Pretrained)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|&#x2714;|vision, video|[OpenGVLab/InternVL3_5-2B-Pretrained](https://huggingface.co/OpenGVLab/InternVL3_5-2B-Pretrained)|
801816
|[OpenGVLab/InternVL3_5-4B-Pretrained](https://modelscope.cn/models/OpenGVLab/InternVL3_5-4B-Pretrained)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|&#x2714;|vision, video|[OpenGVLab/InternVL3_5-4B-Pretrained](https://huggingface.co/OpenGVLab/InternVL3_5-4B-Pretrained)|
@@ -829,6 +844,7 @@ The table below introduces the models integrated with ms-swift:
829844
|[OpenGVLab/InternVL3_5-30B-A3B](https://modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|&#x2714;|vision, video|[OpenGVLab/InternVL3_5-30B-A3B](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B)|
830845
|[OpenGVLab/InternVL3_5-241B-A28B](https://modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B)|internvl3_5|internvl3_5|transformers>=4.37.2, timm|&#x2714;|vision, video|[OpenGVLab/InternVL3_5-241B-A28B](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B)|
831846
|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview](https://modelscope.cn/models/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview)|internvl3_5_gpt|internvl3_5_gpt|transformers>=4.37.2, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview](https://huggingface.co/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview)|
847+
|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF](https://modelscope.cn/models/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF)|internvl_gpt_hf|internvl_hf|transformers>=4.55.0, timm|&#x2718;|vision, video|[OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF](https://huggingface.co/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF)|
832848
|[Shanghai_AI_Laboratory/Intern-S1-mini](https://modelscope.cn/models/Shanghai_AI_Laboratory/Intern-S1-mini)|interns1|interns1|transformers>=4.55.2|&#x2718;|vision, video|[internlm/Intern-S1-mini](https://huggingface.co/internlm/Intern-S1-mini)|
833849
|[Shanghai_AI_Laboratory/Intern-S1](https://modelscope.cn/models/Shanghai_AI_Laboratory/Intern-S1)|interns1|interns1|transformers>=4.55.2|&#x2718;|vision, video|[internlm/Intern-S1](https://huggingface.co/internlm/Intern-S1)|
834850
|[Shanghai_AI_Laboratory/Intern-S1-mini-FP8](https://modelscope.cn/models/Shanghai_AI_Laboratory/Intern-S1-mini-FP8)|interns1|interns1|transformers>=4.55.2|&#x2718;|vision, video|[internlm/Intern-S1-mini-FP8](https://huggingface.co/internlm/Intern-S1-mini-FP8)|
@@ -905,7 +921,7 @@ The table below introduces the models integrated with ms-swift:
905921
|[moonshotai/Kimi-VL-A3B-Thinking](https://modelscope.cn/models/moonshotai/Kimi-VL-A3B-Thinking)|kimi_vl|kimi_vl|transformers<4.49|&#x2718;|-|[moonshotai/Kimi-VL-A3B-Thinking](https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking)|
906922
|[moonshotai/Kimi-VL-A3B-Thinking-2506](https://modelscope.cn/models/moonshotai/Kimi-VL-A3B-Thinking-2506)|kimi_vl|kimi_vl|transformers<4.49|&#x2718;|-|[moonshotai/Kimi-VL-A3B-Thinking-2506](https://huggingface.co/moonshotai/Kimi-VL-A3B-Thinking-2506)|
907923
|[Kwai-Keye/Keye-VL-8B-Preview](https://modelscope.cn/models/Kwai-Keye/Keye-VL-8B-Preview)|keye_vl|keye_vl|keye_vl_utils|&#x2718;|vision|[Kwai-Keye/Keye-VL-8B-Preview](https://huggingface.co/Kwai-Keye/Keye-VL-8B-Preview)|
908-
|[Kwai-Keye/Keye-VL-1_5-8B](https://modelscope.cn/models/Kwai-Keye/Keye-VL-1_5-8B)|keye_vl_1_5|keye_vl|keye_vl_utils>=1.5.2|&#x2718;|vision|[Kwai-Keye/Keye-VL-1_5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-1_5-8B)|
924+
|[Kwai-Keye/Keye-VL-1_5-8B](https://modelscope.cn/models/Kwai-Keye/Keye-VL-1_5-8B)|keye_vl_1_5|keye_vl_1_5|keye_vl_utils>=1.5.2|&#x2718;|vision|[Kwai-Keye/Keye-VL-1_5-8B](https://huggingface.co/Kwai-Keye/Keye-VL-1_5-8B)|
909925
|[rednote-hilab/dots.ocr](https://modelscope.cn/models/rednote-hilab/dots.ocr)|dots_ocr|dots_ocr|transformers>=4.51.0|&#x2718;|-|[rednote-hilab/dots.ocr](https://huggingface.co/rednote-hilab/dots.ocr)|
910926
|[LLM-Research/Phi-3-vision-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-vision-128k-instruct)|phi3_vision|phi3_vision|transformers>=4.36|&#x2718;|vision|[microsoft/Phi-3-vision-128k-instruct](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct)|
911927
|[LLM-Research/Phi-3.5-vision-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-vision-instruct)|phi3_vision|phi3_vision|transformers>=4.36|&#x2718;|vision|[microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)|

swift/llm/model/constant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,10 @@ class MLLMModelType:
185185
internvl2_phi3 = 'internvl2_phi3'
186186
internvl2_5 = 'internvl2_5'
187187
internvl3 = 'internvl3'
188+
internvl_hf = 'internvl_hf'
188189
internvl3_5 = 'internvl3_5'
189190
internvl3_5_gpt = 'internvl3_5_gpt'
191+
internvl_gpt_hf = 'internvl_gpt_hf'
190192
interns1 = 'interns1'
191193
xcomposer2 = 'xcomposer2'
192194
xcomposer2_4khd = 'xcomposer2_4khd'

0 commit comments

Comments
 (0)