Skip to content

Commit 4b245f1

Browse files
add minicpmv4 test case
1 parent 7c64417 commit 4b245f1

File tree

4 files changed

+50
-10
lines changed

4 files changed

+50
-10
lines changed

tests/openvino/test_exporters_cli.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,17 @@ class OVCLIExportTestCase(unittest.TestCase):
622622
"resampler_model": {"int8": 6},
623623
},
624624
),
625+
(
626+
"image-text-to-text",
627+
"minicpmv4",
628+
"int4 --group-size 4 --ratio 0.8 --trust-remote-code",
629+
{
630+
"lm_model": {"int8": 10, "int4": 20},
631+
"text_embeddings_model": {"int8": 1},
632+
"vision_embeddings_model": {"int8": 26},
633+
"resampler_model": {"int8": 6},
634+
},
635+
),
625636
(
626637
"image-text-to-text",
627638
"minicpmv",

tests/openvino/test_modeling.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,7 +2439,7 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase):
24392439
SUPPORT_VIDEO.append("llava_next_video")
24402440

24412441
if is_transformers_version(">=", "4.45.0"):
2442-
SUPPORTED_ARCHITECTURES += ["minicpmv", "internvl2", "phi3_v", "qwen2_vl"]
2442+
SUPPORTED_ARCHITECTURES += ["minicpmv", "internvl2", "phi3_v", "qwen2_vl", "minicpmv4"]
24432443
SUPPORT_VIDEO.append("qwen2_vl")
24442444

24452445
if is_transformers_version(">=", "4.46.0"):
@@ -2454,7 +2454,7 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase):
24542454
if is_transformers_version(">=", "4.51"):
24552455
SUPPORTED_ARCHITECTURES += ["llama4"]
24562456
TASK = "image-text-to-text"
2457-
REMOTE_CODE_MODELS = ["internvl2", "minicpmv", "nanollava", "phi3_v", "maira2", "phi4mm"]
2457+
REMOTE_CODE_MODELS = ["internvl2", "minicpmv", "nanollava", "phi3_v", "maira2", "phi4mm", "minicpmv4"]
24582458

24592459
IMAGE = Image.open(
24602460
requests.get(
@@ -2558,8 +2558,8 @@ def test_compare_to_transformers(self, model_arch):
25582558
ov_model.clear_requests()
25592559
self._check_device_and_request(ov_model, test_device, False)
25602560

2561-
# pytorch minicpmv and internvl2 are not designed to be used via forward
2562-
if model_arch not in ["minicpmv", "internvl2"]:
2561+
# pytorch minicpmv/minicpmv4 and internvl2 are not designed to be used via forward
2562+
if model_arch not in ["minicpmv", "minicpmv4", "internvl2"]:
25632563
set_seed(SEED)
25642564
ov_outputs = ov_model(**inputs)
25652565
set_seed(SEED)
@@ -2608,8 +2608,8 @@ def test_compare_to_transformers(self, model_arch):
26082608
**transformers_inputs, generation_config=gen_config, **additional_inputs
26092609
)
26102610

2611-
# original minicpmv, internvl always skip input tokens in generation results, while transformers based approach provide them
2612-
if model_arch in ["minicpmv", "internvl2"]:
2611+
# original minicpmv/minicpmv4, internvl always skip input tokens in generation results, while transformers based approach provide them
2612+
if model_arch in ["minicpmv", "minicpmv4", "internvl2"]:
26132613
ov_outputs = ov_outputs[:, inputs["input_ids"].shape[1] :]
26142614
self.assertTrue(
26152615
torch.equal(ov_outputs, transformers_outputs),
@@ -2634,8 +2634,8 @@ def test_compare_to_transformers(self, model_arch):
26342634
inputs = ov_model.preprocess_inputs(**preprocessors, text=question, video=input_video)
26352635
transformers_inputs = copy.deepcopy(inputs)
26362636
ov_outputs = ov_model.generate(**inputs, generation_config=gen_config)
2637-
# original minicpmv, internvl always skip input tokens in generation results, while transformers based approach provide them
2638-
if model_arch in ["minicpmv", "internvl2"]:
2637+
# original minicpmv/minicpmv4, internvl always skip input tokens in generation results, while transformers based approach provide them
2638+
if model_arch in ["minicpmv", "minicpmv4", "internvl2"]:
26392639
ov_outputs = ov_outputs[:, inputs["input_ids"].shape[1] :]
26402640
with torch.no_grad():
26412641
transformers_outputs = transformers_model.generate(
@@ -2652,8 +2652,8 @@ def test_compare_to_transformers(self, model_arch):
26522652
inputs = ov_model.preprocess_inputs(**preprocessors, text=question, audio=[input_audio])
26532653
transformers_inputs = copy.deepcopy(inputs)
26542654
ov_outputs = ov_model.generate(**inputs, generation_config=gen_config)
2655-
# original minicpmv, internvl always skip input tokens in generation results, while transformers based approach provide them
2656-
if model_arch in ["minicpmv", "internvl2"]:
2655+
# original minicpmv/minicpmv4, internvl always skip input tokens in generation results, while transformers based approach provide them
2656+
if model_arch in ["minicpmv", "minicpmv4", "internvl2"]:
26572657
ov_outputs = ov_outputs[:, inputs["input_ids"].shape[1] :]
26582658
with torch.no_grad():
26592659
transformers_outputs = transformers_model.generate(

tests/openvino/test_quantization.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,27 @@ class OVWeightCompressionTest(unittest.TestCase):
982982
"resampler_model": {"int8": 6},
983983
},
984984
),
985+
(
986+
OVModelForVisualCausalLM,
987+
"minicpmv4",
988+
True,
989+
dict(
990+
bits=4,
991+
group_size=16,
992+
dataset="contextual",
993+
ratio=0.8,
994+
sensitivity_metric="mean_activation_magnitude",
995+
num_samples=1,
996+
processor=MODEL_NAMES["minicpmv4"],
997+
trust_remote_code=True,
998+
),
999+
{
1000+
"lm_model": {"int8": 8, "int4": 22},
1001+
"text_embeddings_model": {"int8": 1},
1002+
"vision_embeddings_model": {"int8": 26},
1003+
"resampler_model": {"int8": 6},
1004+
},
1005+
),
9851006
(
9861007
OVModelForVisualCausalLM,
9871008
"internvl2",
@@ -1116,6 +1137,7 @@ class OVWeightCompressionTest(unittest.TestCase):
11161137

11171138
if is_transformers_version(">=", "4.45.0"):
11181139
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION.append((OVModelForVisualCausalLM, "minicpmv", True))
1140+
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION.append((OVModelForVisualCausalLM, "minicpmv4", True))
11191141
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION.append((OVModelForVisualCausalLM, "qwen2_vl", False))
11201142

11211143
SUPPORTED_ARCHITECTURES_WITH_HYBRID_QUANTIZATION = [

tests/openvino/utils_tests.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
"minicpm": "katuni4ka/tiny-random-minicpm",
115115
"minicpm3": "katuni4ka/tiny-random-minicpm3",
116116
"minicpmv": "katuni4ka/tiny-random-minicpmv-2_6",
117+
"minicpmv4": "snake7gun/minicpm-v-4-tiny",
117118
"mistral": "echarlaix/tiny-random-mistral",
118119
"mistral-nemo": "katuni4ka/tiny-random-mistral-nemo",
119120
"mixtral": "TitanML/tiny-mixtral",
@@ -282,6 +283,12 @@
282283
"vision_embeddings_model": 26,
283284
"resampler_model": 6,
284285
},
286+
"minicpmv4": {
287+
"lm_model": 30,
288+
"text_embeddings_model": 1,
289+
"vision_embeddings_model": 26,
290+
"resampler_model": 6,
291+
},
285292
"llava_next_video": {
286293
"lm_model": 30,
287294
"text_embeddings_model": 1,

0 commit comments

Comments
 (0)