Skip to content

Commit 8414904

Browse files
authored
[Model] Enable native HF format InternVL support (#23742)
Signed-off-by: Isotr0py <[email protected]>
1 parent 3af47c3 commit 8414904

File tree

4 files changed

+18
-16
lines changed

4 files changed

+18
-16
lines changed

docs/models/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
629629
| `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
630630
| `InternS1ForConditionalGeneration` | Intern-S1 | T + I<sup>E+</sup> + V<sup>E+</sup> | `internlm/Intern-S1`, etc. | ✅︎ | ✅︎ | ✅︎ |
631631
| `InternVLChatModel` | InternVL 3.5, InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0 | T + I<sup>E+</sup> + (V<sup>E+</sup>) | `OpenGVLab/InternVL3_5-14B`, `OpenGVLab/InternVL3-9B`, `OpenGVLab/InternVideo2_5_Chat_8B`, `OpenGVLab/InternVL2_5-4B`, `OpenGVLab/Mono-InternVL-2B`, `OpenGVLab/InternVL2-4B`, etc. | ✅︎ | ✅︎ | ✅︎ |
632+
| `InternVLForConditionalGeneration` | InternVL 3.0 (HF format) | T + I<sup>E+</sup> + V<sup>E+</sup> | `OpenGVLab/InternVL3-1B-hf`, etc. | ✅︎ | ✅︎ | ✅︎ |
632633
| `KeyeForConditionalGeneration` | Keye-VL-8B-Preview | T + I<sup>E+</sup> + V<sup>E+</sup> | `Kwai-Keye/Keye-VL-8B-Preview` | | | ✅︎ |
633634
| `KimiVLForConditionalGeneration` | Kimi-VL-A3B-Instruct, Kimi-VL-A3B-Thinking | T + I<sup>+</sup> | `moonshotai/Kimi-VL-A3B-Instruct`, `moonshotai/Kimi-VL-A3B-Thinking` | | ✅︎ | ✅︎ |
634635
| `Llama4ForConditionalGeneration` | Llama 4 | T + I<sup>+</sup> | `meta-llama/Llama-4-Scout-17B-16E-Instruct`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct`, etc. | | ✅︎ | ✅︎ |

tests/models/multimodal/generation/test_common.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -222,21 +222,6 @@
222222
},
223223
marks=[large_gpu_mark(min_gb=32)],
224224
),
225-
# Check "auto" with fallback to transformers
226-
"internvl-transformers": VLMTestInfo(
227-
models=["OpenGVLab/InternVL3-1B-hf"],
228-
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
229-
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
230-
img_idx_to_prompt=lambda idx: "<IMG_CONTEXT>",
231-
max_model_len=4096,
232-
use_tokenizer_eos=True,
233-
image_size_factors=[(0.25, 0.5, 1.0)],
234-
vllm_runner_kwargs={
235-
"model_impl": "auto",
236-
},
237-
auto_cls=AutoModelForImageTextToText,
238-
marks=[pytest.mark.core_model],
239-
),
240225
#### Extended model tests
241226
"aria": VLMTestInfo(
242227
models=["rhymes-ai/Aria"],
@@ -461,6 +446,20 @@
461446
use_tokenizer_eos=True,
462447
patch_hf_runner=model_utils.internvl_patch_hf_runner,
463448
),
449+
"intern_vl-hf": VLMTestInfo(
450+
models=["OpenGVLab/InternVL3-1B-hf"],
451+
test_type=(
452+
VLMTestType.IMAGE,
453+
VLMTestType.MULTI_IMAGE,
454+
VLMTestType.VIDEO,
455+
),
456+
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
457+
img_idx_to_prompt=lambda idx: "<IMG_CONTEXT>",
458+
video_idx_to_prompt=lambda idx: "<video>",
459+
max_model_len=8192,
460+
use_tokenizer_eos=True,
461+
auto_cls=AutoModelForImageTextToText,
462+
),
464463
"kimi_vl": VLMTestInfo(
465464
models=["moonshotai/Kimi-VL-A3B-Instruct"],
466465
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),

tests/models/registry.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ def check_available_online(
429429
"3.5-qwen3moe": "OpenGVLab/InternVL3_5-30B-A3B", # noqa: E501
430430
"3.5-gptoss": "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"}, # noqa: E501
431431
trust_remote_code=True),
432+
"InternVLForConditionalGeneration": _HfExamplesInfo("OpenGVLab/InternVL3-1B-hf"), # noqa: E501
432433
"KeyeForConditionalGeneration": _HfExamplesInfo("Kwai-Keye/Keye-VL-8B-Preview", # noqa: E501
433434
trust_remote_code=True),
434435
"KimiVLForConditionalGeneration": _HfExamplesInfo("moonshotai/Kimi-VL-A3B-Instruct", # noqa: E501
@@ -584,7 +585,7 @@ def check_available_online(
584585
_TRANSFORMERS_BACKEND_MODELS = {
585586
"TransformersModel": _HfExamplesInfo("Qwen/Qwen3-Embedding-0.6B"),
586587
"TransformersForCausalLM": _HfExamplesInfo("hmellor/Ilama-3.2-1B", trust_remote_code=True), # noqa: E501
587-
"TransformersForMultimodalLM": _HfExamplesInfo("OpenGVLab/InternVL3-1B-hf"),
588+
"TransformersForMultimodalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
588589
}
589590

590591
_EXAMPLE_MODELS = {

vllm/model_executor/models/registry.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@
220220
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
221221
"InternVLChatModel": ("internvl", "InternVLChatModel"),
222222
"InternS1ForConditionalGeneration": ("interns1", "InternS1ForConditionalGeneration"), # noqa: E501
223+
"InternVLForConditionalGeneration": ("interns1", "InternS1ForConditionalGeneration"), # noqa: E501
223224
"Idefics3ForConditionalGeneration":("idefics3","Idefics3ForConditionalGeneration"),
224225
"SmolVLMForConditionalGeneration": ("smolvlm","SmolVLMForConditionalGeneration"), # noqa: E501
225226
"KeyeForConditionalGeneration": ("keye", "KeyeForConditionalGeneration"),

0 commit comments

Comments
 (0)