diff --git a/samples/python/visual_language_chat/milebench_eval_vlm.py b/samples/python/visual_language_chat/milebench_eval_vlm.py index 4d272bb9bc..f64c5abaab 100644 --- a/samples/python/visual_language_chat/milebench_eval_vlm.py +++ b/samples/python/visual_language_chat/milebench_eval_vlm.py @@ -153,12 +153,16 @@ def __getitem__(self, idx): context += choice_str img_num = len(ann["task_instance"]["images_path"]) - qwen2_vl_image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>" + + def idx_to_ov_image_placeholder(idx: int) -> str: + return f"" + for i in range(img_num): rmv_txt = "{image#%d}" % (i + 1) rmv_tbl = "{table#%d}" % (i + 1) - context = context.replace(rmv_txt, qwen2_vl_image_placeholder) - context = context.replace(rmv_tbl, qwen2_vl_image_placeholder) + image_placeholder = idx_to_ov_image_placeholder(i) + context = context.replace(rmv_txt, image_placeholder) + context = context.replace(rmv_tbl, image_placeholder) task_instruction_id = ann["task_instruction_id"] context_str = task_instructions[task_instruction_id] + "\n" + context diff --git a/src/cpp/src/continuous_batching/pipeline_base.cpp b/src/cpp/src/continuous_batching/pipeline_base.cpp index 8afcfc6288..2c875dfb87 100644 --- a/src/cpp/src/continuous_batching/pipeline_base.cpp +++ b/src/cpp/src/continuous_batching/pipeline_base.cpp @@ -232,7 +232,9 @@ ContinuousBatchingPipeline::IContinuousBatchingPipeline::generate( const std::vector>& images_vector, const std::vector& sampling_params, const StreamerVariant& streamer) { - return generate(prompts, images_vector, {{}}, sampling_params, streamer); + // empty videos batch size should match prompt batch size + const std::vector> empty_videos_vector(prompts.size()); + return generate(prompts, images_vector, empty_videos_vector, sampling_params, streamer); } std::vector