Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,12 @@ jobs:
cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
timeout: 180
- name: 'VLM (MiniCPM-o-2_6)'
cmd: |
python -m pip install transformers==4.51.3
python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
timeout: 30
- name: 'GGUF Reader tests'
cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/manylinux_2_28.yml
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,12 @@ jobs:
cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py ./tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py ./tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
timeout: 180
- name: 'VLM (MiniCPM-o-2_6)'
cmd: |
python -m pip install transformers==4.51.3
python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
timeout: 30
- name: 'GGUF Reader tests'
cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,12 @@ jobs:
cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
timeout: 180
- name: 'VLM (MiniCPM-o-2_6)'
cmd: |
python -m pip install transformers==4.51.3
python -m pytest -s -v tests/python_tests/test_vlm_pipeline.py --override-ini cache_dir=/mount/caches/pytest/ -k "MiniCPM-o-2_6"
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test }}
timeout: 30
- name: 'GGUF Reader tests'
cmd: 'python -m pytest -s -v tests/python_tests/test_gguf_reader.py'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
Expand Down
3 changes: 3 additions & 0 deletions tests/python_tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ torchcodec==0.7.0; sys_platform == "linux"
rouge==1.0.1
# - microsoft/Phi-4-multimodal-instruct
peft==0.17.1
# - openbmb/MiniCPM-o-2_6
vocos==0.1.0
torchaudio==2.8.0
21 changes: 14 additions & 7 deletions tests/python_tests/test_vlm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import numpy as np
import transformers
from optimum.intel.openvino import OVModelForVisualCausalLM
from optimum.utils.import_utils import is_transformers_version
from openvino_genai import (
VLMPipeline,
GenerationConfig,
Expand Down Expand Up @@ -97,6 +98,7 @@ class VlmModelInfo:
"katuni4ka/tiny-random-internvl2",
"katuni4ka/tiny-random-gemma3",
"qnguyen3/nanoLLaVA",
"rkazants/tiny-random-MiniCPM-o-2_6",
*VIDEO_MODEL_IDS,
]

Expand All @@ -115,6 +117,7 @@ class VlmModelInfo:
"katuni4ka/tiny-random-gemma3": lambda idx: "<start_of_image>",
"katuni4ka/tiny-random-internvl2": lambda idx: "<image>\n",
"katuni4ka/tiny-random-minicpmv-2_6": lambda idx: "<image>./</image>\n",
"rkazants/tiny-random-MiniCPM-o-2_6": lambda idx: "<image>./</image>\n",
"katuni4ka/tiny-random-phi3-vision": lambda idx: f"<|image_{idx + 1}|>\n",
"katuni4ka/tiny-random-llava-next-video": lambda idx: "<image>\n",
"qnguyen3/nanoLLaVA": lambda idx: "<image>\n",
Expand All @@ -125,6 +128,7 @@ class VlmModelInfo:
"katuni4ka/tiny-random-gemma3": 32,
"qnguyen3/nanoLLaVA": 384,
"katuni4ka/tiny-random-llava-next-video": 336,
"rkazants/tiny-random-MiniCPM-o-2_6": 448,
}


Expand Down Expand Up @@ -184,6 +188,9 @@ def _get_ov_model(model_id: str) -> str:
pytest.skip("ValueError: The current version of Transformers does not allow for the export of the model. Maximum required is 4.53.3, got: 4.55.4")
if "katuni4ka/tiny-random-phi3-vision" == model_id:
pytest.xfail("AttributeError: 'DynamicCache' object has no attribute 'get_usable_length'. Ticket CVS-175110")
if "rkazants/tiny-random-MiniCPM-o-2_6" == model_id and is_transformers_version(">", "4.51.3"):
pytest.skip("ValueError: The current version of Transformers does not allow for the export of the model. Maximum supported version is 4.51.3")

ov_cache_converted_dir = get_ov_cache_converted_models_dir()
dir_name = str(model_id).replace(os.sep, "_")
model_dir = ov_cache_converted_dir / dir_name
Expand Down Expand Up @@ -215,6 +222,7 @@ def convert_to_temp(temp_dir: Path) -> None:
"katuni4ka/tiny-random-phi3-vision",
"katuni4ka/tiny-random-phi-4-multimodal",
"qnguyen3/nanoLLaVA",
"rkazants/tiny-random-MiniCPM-o-2_6"
},
)
)
Expand Down Expand Up @@ -384,6 +392,9 @@ def synthetic_video(pytestconfig):
def synthetic_video_32x32(synthetic_video):
return resize_video(synthetic_video, (32, 32))

@pytest.fixture(scope="module")
def cat_image_448x448(cat_image):
return cat_image.resize((448, 448))

@pytest.fixture(scope="module")
def cat_image_384x384(cat_image):
Expand Down Expand Up @@ -1422,6 +1433,8 @@ def test_model_tags_missing_native(ov_pipe_model: VlmModelInfo):
pytest.param(("katuni4ka/tiny-random-llava-next-video", "PA"), False, True, id="llava-next-video/PA/video"),
pytest.param(("katuni4ka/tiny-random-llava-next-video", "SDPA"), True, True, id="llava-next-video/SDPA/image+video"),
pytest.param(("katuni4ka/tiny-random-llava-next-video", "PA"), True, True, id="llava-next-video/PA/image+video"),
pytest.param(("rkazants/tiny-random-MiniCPM-o-2_6", "SDPA"), True, False, id="MiniCPM-o-2_6/SDPA/image"),
pytest.param(("rkazants/tiny-random-MiniCPM-o-2_6", "PA"), True, False, id="MiniCPM-o-2_6/PA/image")
],
indirect=["ov_pipe_model"],
)
Expand Down Expand Up @@ -1502,13 +1515,7 @@ def get_nanollava_processor():
# Gemma3 input_ids has two bos tokens when running with optimum: one in chat template + "add_bos_token" is set to True in tokenizer_config.json
if model.config.model_type == "gemma3":
processor.tokenizer.add_bos_token = False
params = {}
if resized_image is not None:
params["images"] = [resized_image]
if resized_video is not None:
params["videos"] = [resized_video]
templated_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
inputs = processor(text=[templated_prompt], **params, padding=True, return_tensors="pt")
inputs = model.preprocess_inputs(text=prompt, image=resized_image, video=resized_video, processor=processor, config=model.config)

max_new_tokens = 100

Expand Down