openvinotoolkit · Wovchena · Jan 8, 2026 · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025
@@ -527,6 +527,12 @@ jobs:
             cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
             timeout: 180
+          - name: 'VLM (MiniCPM-o-2_6)'
+            cmd: |
+              python -m pip install transformers==4.51.3
+              python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
+            timeout: 30
           - name: 'GGUF Reader tests'
             cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}

@@ -466,6 +466,12 @@ jobs:
             cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
             timeout: 180
+          - name: 'VLM (MiniCPM-o-2_6)'
+            cmd: |
+              python -m pip install transformers==4.51.3
+              python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
+            timeout: 30
           - name: 'GGUF Reader tests'
             cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}

@@ -614,6 +614,12 @@ jobs:
             cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
             timeout: 180
+          - name: 'VLM (MiniCPM-o-2_6)'
+            cmd: |
+              python -m pip install transformers==4.51.3
+              python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
+            timeout: 30
           - name: 'GGUF Reader tests'
             cmd: 'python -m pytest -s -v tests/python_tests/test_gguf_reader.py'
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}

@@ -30,3 +30,6 @@ torchcodec==0.7.0; sys_platform == "linux"
 rouge==1.0.1
 # - microsoft/Phi-4-multimodal-instruct
 peft==0.17.1
+# - openbmb/MiniCPM-o-2_6
+vocos==0.1.0
+torchaudio==2.8.0
@@ -42,6 +42,7 @@
 import numpy as np
 import transformers
 from optimum.intel.openvino import OVModelForVisualCausalLM
+from optimum.utils.import_utils import is_transformers_version
 from openvino_genai import (
     VLMPipeline,
     GenerationConfig,
@@ -97,6 +98,7 @@ class VlmModelInfo:
     "katuni4ka/tiny-random-internvl2",
     "katuni4ka/tiny-random-gemma3",
     "qnguyen3/nanoLLaVA",
+    "rkazants/tiny-random-MiniCPM-o-2_6",
     *VIDEO_MODEL_IDS,
 ]
 
@@ -115,6 +117,7 @@ class VlmModelInfo:
     "katuni4ka/tiny-random-gemma3": lambda idx: "<start_of_image>",
     "katuni4ka/tiny-random-internvl2": lambda idx: "<image>\n",
     "katuni4ka/tiny-random-minicpmv-2_6": lambda idx: "<image>./</image>\n",
+    "rkazants/tiny-random-MiniCPM-o-2_6": lambda idx: "<image>./</image>\n",
     "katuni4ka/tiny-random-phi3-vision": lambda idx: f"<|image_{idx + 1}|>\n",
     "katuni4ka/tiny-random-llava-next-video": lambda idx: "<image>\n",
     "qnguyen3/nanoLLaVA": lambda idx: "<image>\n",
@@ -125,6 +128,7 @@ class VlmModelInfo:
     "katuni4ka/tiny-random-gemma3": 32,
     "qnguyen3/nanoLLaVA": 384,
     "katuni4ka/tiny-random-llava-next-video": 336,
+    "rkazants/tiny-random-MiniCPM-o-2_6": 448,
 }
 
 
@@ -184,6 +188,9 @@ def _get_ov_model(model_id: str) -> str:
         pytest.skip("ValueError: The current version of Transformers does not allow for the export of the model. Maximum required is 4.53.3, got: 4.55.4")
     if "katuni4ka/tiny-random-phi3-vision" == model_id:
         pytest.xfail("AttributeError: 'DynamicCache' object has no attribute 'get_usable_length'. Ticket CVS-175110")
+    if "rkazants/tiny-random-MiniCPM-o-2_6" == model_id and is_transformers_version(">", "4.51.3"):
+        pytest.skip("ValueError: The current version of Transformers does not allow for the export of the model. Maximum supported version is 4.51.3")
+
     ov_cache_converted_dir = get_ov_cache_converted_models_dir()
     dir_name = str(model_id).replace(os.sep, "_")
     model_dir = ov_cache_converted_dir / dir_name
@@ -215,6 +222,7 @@ def convert_to_temp(temp_dir: Path) -> None:
                     "katuni4ka/tiny-random-phi3-vision",
                     "katuni4ka/tiny-random-phi-4-multimodal",
                     "qnguyen3/nanoLLaVA",
+                    "rkazants/tiny-random-MiniCPM-o-2_6"
                 },
             )
         )
@@ -384,6 +392,9 @@ def synthetic_video(pytestconfig):
 def synthetic_video_32x32(synthetic_video):
     return resize_video(synthetic_video, (32, 32))
 
+@pytest.fixture(scope="module")
+def cat_image_448x448(cat_image):
+    return cat_image.resize((448, 448))
 
 @pytest.fixture(scope="module")
 def cat_image_384x384(cat_image):
@@ -1422,6 +1433,8 @@ def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo):
         pytest.param(("katuni4ka/tiny-random-llava-next-video", "PA"), False, True, id="llava-next-video/PA/video"),
         pytest.param(("katuni4ka/tiny-random-llava-next-video", "SDPA"), True, True, id="llava-next-video/SDPA/image+video"),
         pytest.param(("katuni4ka/tiny-random-llava-next-video", "PA"), True, True, id="llava-next-video/PA/image+video"),
+        pytest.param(("rkazants/tiny-random-MiniCPM-o-2_6", "SDPA"), True, False, id="MiniCPM-o-2_6/SDPA/image"),
+        pytest.param(("rkazants/tiny-random-MiniCPM-o-2_6", "PA"), True, False, id="MiniCPM-o-2_6/PA/image")
     ],
     indirect=["ov_pipe_model"],
 )
@@ -1502,13 +1515,7 @@ def get_nanollava_processor():
         # Gemma3 input_ids has two bos tokens when running with optimum: one in chat template + "add_bos_token" is set to True in tokenizer_config.json
         if model.config.model_type == "gemma3":
             processor.tokenizer.add_bos_token = False
-        params = {}
-        if resized_image is not None:
-            params["images"] = [resized_image]
-        if resized_video is not None:
-            params["videos"] = [resized_video]
-        templated_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
-        inputs = processor(text=[templated_prompt], **params, padding=True, return_tensors="pt")
+        inputs = model.preprocess_inputs(text=prompt, image=resized_image, video=resized_video, processor=processor, config=model.config)
 
     max_new_tokens = 100