Skip to content

Commit 5aee540

Browse files
committed
ruff
Signed-off-by: Dong Wang <dongw2019@gmail.com>
1 parent 6b70e9b commit 5aee540

File tree

3 files changed

+51
-69
lines changed

3 files changed

+51
-69
lines changed

tests/conftest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,10 @@ def generate(
573573
use_cache=True,
574574
**kwargs,
575575
)
576+
if self.processor is None:
577+
raise RuntimeError(
578+
"HfRunner.processor is not initialized; cannot decode output."
579+
)
576580
output_str = self.processor.batch_decode(
577581
output_ids,
578582
skip_special_tokens=True,

tests/models/multimodal/generation/vlm_utils/model_utils.py

Lines changed: 45 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ def qwen_prompt_path_encoder(
301301
def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
302302
"""Patches and returns an instance of the HfRunner to use for GLM4."""
303303
hf_processor = hf_model.processor
304+
assert hf_processor is not None
304305

305306
def processor(*args, text="", images=None, **kwargs):
306307
if isinstance(images, Image):
@@ -320,15 +321,16 @@ def processor(*args, text="", images=None, **kwargs):
320321
return BatchFeature(data=inputs, tensor_type="pt")
321322

322323
hf_model.processor = processor
323-
hf_model.model.get_output_embeddings = (
324-
lambda: hf_model.model.language.model.embed_tokens
324+
hf_model.model.get_output_embeddings = lambda: (
325+
hf_model.model.language.model.embed_tokens
325326
)
326327
return hf_model
327328

328329

329330
def gemma3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
330331
"""Patches and returns an instance of the HfRunner to use for Gemma 3."""
331332
hf_processor = hf_model.processor
333+
assert hf_processor is not None
332334

333335
def processor(*args, **kwargs):
334336
return hf_processor(*args, do_pan_and_scan=True, **kwargs)
@@ -408,6 +410,7 @@ def patched_forward(*args, **kwargs):
408410
hf_model.model.forward = patched_forward
409411

410412
hf_processor = hf_model.processor
413+
assert hf_processor is not None
411414

412415
def processor(*args, text="", images=None, **kwargs):
413416
if images is None:
@@ -433,15 +436,16 @@ def processor(*args, text="", images=None, **kwargs):
433436
)
434437

435438
hf_model.processor = processor
436-
hf_model.model.get_output_embeddings = (
437-
lambda: hf_model.model.transformer.output_layer
439+
hf_model.model.get_output_embeddings = lambda: (
440+
hf_model.model.transformer.output_layer
438441
)
439442
return hf_model
440443

441444

442445
def glm4_1v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
443446
"""Patches and returns an instance of the HfRunner to use for GLM4.1V."""
444447
hf_processor = hf_model.processor
448+
assert hf_processor is not None
445449

446450
def processor(*args, videos=None, **kwargs):
447451
if videos is not None and is_list_of(videos, tuple):
@@ -521,8 +525,8 @@ def __call__(self, text: str, images: Image | list[Image], **kwargs):
521525
img_context_token_id = hf_model.tokenizer.convert_tokens_to_ids("<IMG_CONTEXT>")
522526
hf_model.model.img_context_token_id = img_context_token_id
523527
hf_model.processor = H2OVLProcessor(hf_model)
524-
hf_model.model.get_output_embeddings = (
525-
lambda: hf_model.model.language_model.get_output_embeddings()
528+
hf_model.model.get_output_embeddings = lambda: (
529+
hf_model.model.language_model.get_output_embeddings()
526530
)
527531
hf_model.model.generate = types.MethodType(_internvl_generate, hf_model.model)
528532
return hf_model
@@ -555,6 +559,7 @@ def compute_position_ids_input_ids(input_ids: torch.Tensor) -> torch.Tensor:
555559
# 1) Patch processor: move BatchFeature input_ids and TensorStream to model device
556560
# ----------------------------
557561
original_processor = hf_model.processor
562+
assert original_processor is not None
558563

559564
def patched_processor(*args, **kwargs):
560565
result = original_processor(*args, **kwargs)
@@ -782,8 +787,8 @@ def __call__(self, text: str, images: Image | list[Image], **kwargs):
782787
img_context_token_id = hf_model.tokenizer.convert_tokens_to_ids("<IMG_CONTEXT>")
783788
hf_model.model.img_context_token_id = img_context_token_id
784789
hf_model.processor = SkyworkR1VProcessor(hf_model)
785-
hf_model.model.get_output_embeddings = (
786-
lambda: hf_model.model.language_model.get_output_embeddings()
790+
hf_model.model.get_output_embeddings = lambda: (
791+
hf_model.model.language_model.get_output_embeddings()
787792
)
788793
hf_model.model.generate = types.MethodType(_internvl_generate, hf_model.model)
789794
return hf_model
@@ -890,8 +895,8 @@ def __call__(
890895
img_context_token_id = hf_model.tokenizer.convert_tokens_to_ids("<IMG_CONTEXT>")
891896
hf_model.model.img_context_token_id = img_context_token_id
892897
hf_model.processor = InternVLProcessor(hf_model)
893-
hf_model.model.get_output_embeddings = (
894-
lambda: hf_model.model.language_model.get_output_embeddings()
898+
hf_model.model.get_output_embeddings = lambda: (
899+
hf_model.model.language_model.get_output_embeddings()
895900
)
896901
hf_model.model.generate = types.MethodType(_internvl_generate, hf_model.model)
897902
return hf_model
@@ -1029,6 +1034,7 @@ def _generate(self, *args, image_sizes=None, **kwargs):
10291034
def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
10301035
"""Patches and returns an instance of the HfRunner to use for Molmo."""
10311036
hf_processor = hf_model.processor
1037+
assert hf_processor is not None
10321038

10331039
def _processor(*args, **kwargs):
10341040
return hf_processor.process(*args, **kwargs)
@@ -1060,8 +1066,8 @@ def _generate(self, max_new_tokens=None, do_sample=None, **kwargs):
10601066

10611067
def ovis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
10621068
"""Patches and returns an instance of the HfRunner to use for Ovis2."""
1063-
hf_model.model.get_output_embeddings = (
1064-
lambda: hf_model.model.llm.get_output_embeddings()
1069+
hf_model.model.get_output_embeddings = lambda: (
1070+
hf_model.model.llm.get_output_embeddings()
10651071
)
10661072

10671073
def processor(*args, text="", images=None, **kwargs):
@@ -1096,8 +1102,8 @@ def processor(*args, text="", images=None, **kwargs):
10961102

10971103
def ovis2_5_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
10981104
"""Patches and returns an instance of the HfRunner to use for Ovis2."""
1099-
hf_model.model.get_output_embeddings = (
1100-
lambda: hf_model.model.llm.get_output_embeddings()
1105+
hf_model.model.get_output_embeddings = lambda: (
1106+
hf_model.model.llm.get_output_embeddings()
11011107
)
11021108

11031109
def processor(*args, text="", images=None, videos=None, **kwargs):
@@ -1160,6 +1166,7 @@ def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
11601166
def qwen3_vl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
11611167
"""Patches and returns an instance of the HfRunner to use for GLM4.1V."""
11621168
hf_processor = hf_model.processor
1169+
assert hf_processor is not None
11631170

11641171
def processor(*args, videos=None, **kwargs):
11651172
if videos is not None and is_list_of(videos, tuple):
@@ -1211,6 +1218,7 @@ def tarsier_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
12111218
vision_encoder_info = get_vision_encoder_info(hf_model.config)
12121219

12131220
hf_processor = hf_model.processor
1221+
assert hf_processor is not None
12141222
if hf_processor.patch_size is None:
12151223
hf_processor.patch_size = vision_encoder_info.get_patch_size()
12161224

@@ -1287,18 +1295,17 @@ def _encode_vision(pixel_values, tilings):
12871295
pv = pv.to(device=device, dtype=dtype)
12881296

12891297
features = native_model._vis_enc(pv)
1290-
grid_size = (
1291-
config.vision.crop_size // config.vision.enc_patch_size
1292-
)
1298+
grid_size = config.vision.crop_size // config.vision.enc_patch_size
12931299
global_feat = features[0]
12941300

12951301
if features.shape[0] > 1 and tilings is not None:
12961302
tiling = _normalize_tiling(tilings)
1297-
local = features[1:].view(
1298-
-1, grid_size, grid_size, config.vision.enc_dim
1299-
)
1303+
local = features[1:].view(-1, grid_size, grid_size, config.vision.enc_dim)
13001304
reconstructed = reconstruct_from_crops(
1301-
local, tiling, config.vision.overlap_margin, patch_size=1,
1305+
local,
1306+
tiling,
1307+
config.vision.overlap_margin,
1308+
patch_size=1,
13021309
)
13031310
else:
13041311
reconstructed = global_feat.view(
@@ -1362,20 +1369,14 @@ def _generate(
13621369

13631370
# --- Prefill BOS + vision embeddings ---
13641371
bos_emb = F.embedding(
1365-
torch.tensor(
1366-
[[config.tokenizer.bos_id]], device=device
1367-
),
1372+
torch.tensor([[config.tokenizer.bos_id]], device=device),
13681373
native_model.text.wte,
13691374
)
1370-
img_input = torch.cat(
1371-
[bos_emb, img_emb.unsqueeze(0)], dim=1
1372-
)
1375+
img_input = torch.cat([bos_emb, img_emb.unsqueeze(0)], dim=1)
13731376
prefix_len = img_input.size(1) # 730
13741377

13751378
mask = native_model.attn_mask[:, :, :prefix_len, :]
1376-
pos_ids = torch.arange(
1377-
prefix_len, dtype=torch.long, device=device
1378-
)
1379+
pos_ids = torch.arange(prefix_len, dtype=torch.long, device=device)
13791380
native_model._prefill(img_input, mask, pos_ids, None)
13801381

13811382
# --- Extract prompt tokens after BOS + <image> ---
@@ -1391,7 +1392,7 @@ def _generate(
13911392
)
13921393
return sequences
13931394

1394-
prompt_tokens = ids[img_start + len(image_placeholder_ids):]
1395+
prompt_tokens = ids[img_start + len(image_placeholder_ids) :]
13951396

13961397
# --- Prefill prompt tokens and get first logits ---
13971398
if not prompt_tokens:
@@ -1403,35 +1404,23 @@ def _generate(
14031404
)
14041405
return sequences
14051406

1406-
prompt_tensor = torch.tensor(
1407-
[prompt_tokens], device=device
1408-
)
1409-
prompt_emb = F.embedding(
1410-
prompt_tensor, native_model.text.wte
1411-
)
1407+
prompt_tensor = torch.tensor([prompt_tokens], device=device)
1408+
prompt_emb = F.embedding(prompt_tensor, native_model.text.wte)
14121409
prompt_len = prompt_emb.size(1)
14131410

1414-
mask = native_model.attn_mask[
1415-
:, :, prefix_len : prefix_len + prompt_len, :
1416-
]
1411+
mask = native_model.attn_mask[:, :, prefix_len : prefix_len + prompt_len, :]
14171412
pos_ids = torch.arange(
14181413
prefix_len,
14191414
prefix_len + prompt_len,
14201415
dtype=torch.long,
14211416
device=device,
14221417
)
1423-
hidden = native_model._prefill(
1424-
prompt_emb, mask, pos_ids, None
1425-
)
1418+
hidden = native_model._prefill(prompt_emb, mask, pos_ids, None)
14261419
pos = prefix_len + prompt_len
14271420

14281421
# Compute logits from last hidden state
1429-
hidden_last = native_model.text.post_ln(
1430-
hidden[:, -1:, :]
1431-
)
1432-
logits = native_model.text.lm_head(
1433-
hidden_last.squeeze(1)
1434-
)
1422+
hidden_last = native_model.text.post_ln(hidden[:, -1:, :])
1423+
logits = native_model.text.lm_head(hidden_last.squeeze(1))
14351424

14361425
# --- Greedy decode ---
14371426
generated = []
@@ -1456,22 +1445,12 @@ def _generate(
14561445
torch.tensor([[next_token]], device=device),
14571446
native_model.text.wte,
14581447
)
1459-
mask = native_model.attn_mask[
1460-
:, :, pos : pos + 1, :
1461-
]
1462-
pos_ids_step = torch.tensor(
1463-
[pos], dtype=torch.long, device=device
1464-
)
1465-
hidden = native_model._prefill(
1466-
next_emb, mask, pos_ids_step, None
1467-
)
1468-
hidden_last = native_model.text.post_ln(
1469-
hidden[:, -1:, :]
1470-
)
1448+
mask = native_model.attn_mask[:, :, pos : pos + 1, :]
1449+
pos_ids_step = torch.tensor([pos], dtype=torch.long, device=device)
1450+
hidden = native_model._prefill(next_emb, mask, pos_ids_step, None)
1451+
hidden_last = native_model.text.post_ln(hidden[:, -1:, :])
14711452
prev_hs = hidden_last
1472-
logits = native_model.text.lm_head(
1473-
hidden_last.squeeze(1)
1474-
)
1453+
logits = native_model.text.lm_head(hidden_last.squeeze(1))
14751454
pos += 1
14761455

14771456
result_ids = ids + generated
@@ -1480,8 +1459,7 @@ def _generate(
14801459
if return_dict:
14811460
return types.SimpleNamespace(
14821461
sequences=sequences,
1483-
hidden_states=tuple(all_hidden_states)
1484-
if output_hs else None,
1462+
hidden_states=tuple(all_hidden_states) if output_hs else None,
14851463
)
14861464
return sequences
14871465

@@ -1514,6 +1492,7 @@ def voxtral_patch_hf_runner(hf_model: "HfRunner") -> "HfRunner":
15141492
import soundfile as sf
15151493

15161494
processor = hf_model.processor
1495+
assert processor is not None
15171496

15181497
def _audio_to_base64(audio_array, sample_rate: int) -> str:
15191498
"""Encode a numpy audio array as a base64 WAV string."""

vllm/v1/engine/output_processor.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -661,12 +661,11 @@ def process_outputs(
661661
if (
662662
isinstance(text_override, str)
663663
and finish_reason is not None
664-
and hasattr(request_output, "outputs")
664+
and isinstance(request_output, RequestOutput)
665665
and request_output.outputs
666666
):
667667
for comp_output in request_output.outputs:
668-
if isinstance(comp_output, CompletionOutput):
669-
comp_output.text = text_override
668+
comp_output.text = text_override
670669

671670
if req_state.streaming_input:
672671
request_output.finished = False

0 commit comments

Comments
 (0)