Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions benchmarks/benchmark_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,14 @@
"""
content = [{"text": prompt, "type": "text"}]
if mm_content is not None:
content.append(mm_content)
# Handle multiple images in mm_content
if "image" in mm_content and isinstance(mm_content["image"], list):
# Multiple images - append each one separately
for img in mm_content["image"]:
content.append(img)
else:
# Single image or other multimodal content
content.append(mm_content)
return [{"role": "user", "content": content}]

def load_data(self) -> None:
Expand Down Expand Up @@ -747,6 +754,9 @@
sampled_requests = []
dynamic_output = output_len is None

# Extract images_per_request from kwargs
images_per_request = kwargs.get('images_per_request', 1)

for item in filtered_data:
if len(sampled_requests) >= num_requests:
break
Expand All @@ -761,7 +771,22 @@
assert isinstance(output_len, int) and output_len > 0
if dynamic_output and not is_valid_sequence(prompt_len, completion_len):
continue
mm_content = process_image(item["image"]) if "image" in item else None

# Simple approach: if multiple images requested, duplicate the current image
if images_per_request > 1 and "image" in item:
single_image = process_image(item["image"])
if single_image:
# For chat format, we need to create separate image entries
# Instead of {"image": [img1, img2]}, create multiple separate processed images

Check failure on line 780 in benchmarks/benchmark_dataset.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

benchmarks/benchmark_dataset.py:780:89: E501 Line too long (99 > 88)
images = [single_image] * images_per_request
# For multimodal chat, we'll pass the list to be handled properly
mm_content = {"image": images}
else:
mm_content = None
else:
# Original single image logic
mm_content = process_image(item["image"]) if "image" in item else None

if enable_multimodal_chat:
# Note: when chat is enabled the request prompt_len is no longer
# accurate and we will be using request output to count the
Expand Down
4 changes: 4 additions & 0 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,10 @@
"output_len": args.output_len,
}

# Add images_per_request if limit_mm_per_prompt is set
if hasattr(args, 'limit_mm_per_prompt') and args.limit_mm_per_prompt and 'image' in args.limit_mm_per_prompt:

Check failure on line 469 in benchmarks/benchmark_throughput.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

benchmarks/benchmark_throughput.py:469:89: E501 Line too long (113 > 88)
sample_kwargs["images_per_request"] = args.limit_mm_per_prompt['image']

if args.dataset_path is None or args.dataset_name == "random":
sample_kwargs["range_ratio"] = args.random_range_ratio
sample_kwargs["prefix_len"] = args.prefix_len
Expand Down
10 changes: 8 additions & 2 deletions vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,14 @@
jinja_text = (hf_chat_template if isinstance(hf_chat_template, str)
else load_chat_template(chat_template, is_literal=True))

detected_format = ("string" if jinja_text is None else
_detect_content_format(jinja_text, default="string"))
# The InternVL template has mixed content access patterns that fail with automatic detection.

Check failure on line 408 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/entrypoints/chat_utils.py:408:81: E501 Line too long (97 > 80)
# Set string format for proper operation if InternVL is used.
model_type = getattr(model_config.hf_config, 'model_type', '')
if model_type == 'internvl_chat' or 'internvl' in model_config.model.lower():

Check failure on line 411 in vllm/entrypoints/chat_utils.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/entrypoints/chat_utils.py:411:81: E501 Line too long (81 > 80)
detected_format = "string"
else:
detected_format = ("string" if jinja_text is None else
_detect_content_format(jinja_text, default="string"))

return detected_format

Expand Down
Loading