@@ -373,9 +373,12 @@ The table below introcudes all models supported by SWIFT:
373373| llava1_6-vicuna-7b-instruct| [ swift/llava-v1.6-vicuna-7b-hf] ( https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-vicuna| ✔ ; | ✔ ; | ✘ ; | ✘ ; | transformers>=4.39| vision| [ llava-hf/llava-v1.6-vicuna-7b-hf] ( https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf ) |
374374| llava1_6-vicuna-13b-instruct| [ swift/llava-v1.6-vicuna-13b-hf] ( https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-vicuna| ✔ ; | ✔ ; | ✘ ; | ✘ ; | transformers>=4.39| vision| [ llava-hf/llava-v1.6-vicuna-13b-hf] ( https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf ) |
375375| llava1_6-yi-34b-instruct| [ swift/llava-v1.6-34b-hf] ( https://modelscope.cn/models/swift/llava-v1.6-34b-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-yi| ✔ ; | ✔ ; | ✘ ; | ✘ ; | transformers>=4.39| vision| [ llava-hf/llava-v1.6-34b-hf] ( https://huggingface.co/llava-hf/llava-v1.6-34b-hf ) |
376- | llama3-llava-next-8b| [ AI-Modelscope/llama3-llava-next-8b] ( https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary ) | ^(model.layers\| model.mm_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llama-llava-next| ✔ ; | ✘ ; | ✔ ; | ✘ ; || vision| [ lmms-lab/llama3-llava-next-8b] ( https://huggingface.co/lmms-lab/llama3-llava-next-8b ) |
377- | llava-next-72b| [ AI-Modelscope/llava-next-72b] ( https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-qwen-instruct| ✔ ; | ✘ ; | ✔ ; | ✘ ; || vision| [ lmms-lab/llava-next-72b] ( https://huggingface.co/lmms-lab/llava-next-72b ) |
378- | llava-next-110b| [ AI-Modelscope/llava-next-110b] ( https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-qwen-instruct| ✔ ; | ✘ ; | ✔ ; | ✘ ; || vision| [ lmms-lab/llava-next-110b] ( https://huggingface.co/lmms-lab/llava-next-110b ) |
376+ | llama3-llava-next-8b-hf| [ swift/llama3-llava-next-8b-hf] ( https://modelscope.cn/models/swift/llama3-llava-next-8b-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llama-llava-next-hf| ✔ ; | ✔ ; | ✘ ; | ✘ ; | transformers>=4.39| vision| [ llava-hf/llama3-llava-next-8b-hf] ( https://huggingface.co/llava-hf/llama3-llava-next-8b-hf ) |
377+ | llava-next-72b-hf| [ AI-ModelScope/llava-next-72b-hf] ( https://modelscope.cn/models/AI-ModelScope/llava-next-72b-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llama-qwen-hf| ✔ ; | ✔ ; | ✘ ; | ✘ ; | transformers>=4.39| vision| [ llava-hf/llava-next-72b-hf] ( https://huggingface.co/llava-hf/llava-next-72b-hf ) |
378+ | llava-next-110b-hf| [ AI-ModelScope/llava-next-110b-hf] ( https://modelscope.cn/models/AI-ModelScope/llava-next-110b-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llama-qwen-hf| ✔ ; | ✔ ; | ✘ ; | ✘ ; | transformers>=4.39| vision| [ llava-hf/llava-next-110b-hf] ( https://huggingface.co/llava-hf/llava-next-110b-hf ) |
379+ | llama3-llava-next-8b| [ AI-Modelscope/llama3-llava-next-8b] ( https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary ) | ^(model.layers\| model.mm_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llama3-llava-next| ✔ ; | ✘ ; | ✔ ; | ✘ ; || vision| [ lmms-lab/llama3-llava-next-8b] ( https://huggingface.co/lmms-lab/llama3-llava-next-8b ) |
380+ | llava-next-72b| [ AI-Modelscope/llava-next-72b] ( https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-qwen| ✔ ; | ✘ ; | ✔ ; | ✘ ; || vision| [ lmms-lab/llava-next-72b] ( https://huggingface.co/lmms-lab/llava-next-72b ) |
381+ | llava-next-110b| [ AI-Modelscope/llava-next-110b] ( https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-qwen| ✔ ; | ✘ ; | ✔ ; | ✘ ; || vision| [ lmms-lab/llava-next-110b] ( https://huggingface.co/lmms-lab/llava-next-110b ) |
379382| llava-next-video-7b-instruct| [ swift/LLaVA-NeXT-Video-7B-hf] ( https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-next-video| ✔ ; | ✘ ; | ✘ ; | ✘ ; | transformers>=4.42, av| video| [ llava-hf/LLaVA-NeXT-Video-7B-hf] ( https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf ) |
380383| llava-next-video-7b-32k-instruct| [ swift/LLaVA-NeXT-Video-7B-32K-hf] ( https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-next-video| ✔ ; | ✘ ; | ✘ ; | ✘ ; | transformers>=4.42, av| video| [ llava-hf/LLaVA-NeXT-Video-7B-32K-hf] ( https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf ) |
381384| llava-next-video-7b-dpo-instruct| [ swift/LLaVA-NeXT-Video-7B-DPO-hf] ( https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary ) | ^(language_model\| multi_modal_projector)(?!.\* (lm_head\| output\| emb\| wte\| shared)).\* | llava-next-video| ✔ ; | ✘ ; | ✘ ; | ✘ ; | transformers>=4.42, av| video| [ llava-hf/LLaVA-NeXT-Video-7B-DPO-hf] ( https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf ) |
0 commit comments