Skip to content

Commit b40db4d

Browse files
ltd0924luotingdan
andauthored
[StepVL] add step vl offline example (vllm-project#33054)
Signed-off-by: luotingdan <[email protected]> Co-authored-by: luotingdan <[email protected]>
1 parent 11b5568 commit b40db4d

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

examples/offline_inference/vision_language.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,6 +1889,32 @@ def run_step3(questions: list[str], modality: str) -> ModelRequestData:
18891889
)
18901890

18911891

1892+
# StepVL10B
1893+
def run_step_vl(questions: list[str], modality: str) -> ModelRequestData:
1894+
assert modality == "image"
1895+
1896+
model_name = "stepfun-ai/Step3-VL-10B"
1897+
engine_args = EngineArgs(
1898+
model=model_name,
1899+
max_num_batched_tokens=4096,
1900+
tensor_parallel_size=1,
1901+
trust_remote_code=True,
1902+
limit_mm_per_prompt={modality: 1},
1903+
reasoning_parser="deepseek_r1",
1904+
)
1905+
1906+
prompts = [
1907+
"<|begin▁of▁sentence|> You are a helpful assistant.<|BOT|>user\n "
1908+
f"<im_patch>{question} <|EOT|><|BOT|>assistant\n<think>\n"
1909+
for question in questions
1910+
]
1911+
1912+
return ModelRequestData(
1913+
engine_args=engine_args,
1914+
prompts=prompts,
1915+
)
1916+
1917+
18921918
# omni-research/Tarsier-7b
18931919
def run_tarsier(questions: list[str], modality: str) -> ModelRequestData:
18941920
assert modality == "image"
@@ -2006,6 +2032,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
20062032
"skywork_chat": run_skyworkr1v,
20072033
"smolvlm": run_smolvlm,
20082034
"step3": run_step3,
2035+
"stepvl": run_step_vl,
20092036
"tarsier": run_tarsier,
20102037
"tarsier2": run_tarsier2,
20112038
}

examples/offline_inference/vision_language_multi_image.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,32 @@ def load_step3(question: str, image_urls: list[str]) -> ModelRequestData:
11821182
)
11831183

11841184

1185+
def load_step_vl(question: str, image_urls: list[str]) -> ModelRequestData:
1186+
model_name = "stepfun-ai/Step3-VL-10B"
1187+
1188+
engine_args = EngineArgs(
1189+
model=model_name,
1190+
max_num_batched_tokens=4096,
1191+
limit_mm_per_prompt={"image": len(image_urls)},
1192+
hf_overrides={"vision_config": {"enable_patch": False}},
1193+
trust_remote_code=True,
1194+
reasoning_parser="deepseek_r1",
1195+
)
1196+
1197+
prompt = (
1198+
"<|begin▁of▁sentence|> You are a helpful assistant.<|BOT|>user\n "
1199+
f"{'<im_patch>' * len(image_urls)}{question}<|EOT|><|BOT|>"
1200+
"assistant\n<think>\n"
1201+
)
1202+
image_data = [fetch_image(url) for url in image_urls]
1203+
1204+
return ModelRequestData(
1205+
engine_args=engine_args,
1206+
prompt=prompt,
1207+
image_data=image_data,
1208+
)
1209+
1210+
11851211
def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData:
11861212
model_name = "omni-research/Tarsier-7b"
11871213

@@ -1374,6 +1400,7 @@ def load_molmo2(question: str, image_urls: list[str]) -> ModelRequestData:
13741400
"rvl": load_r_vl,
13751401
"smolvlm": load_smolvlm,
13761402
"step3": load_step3,
1403+
"stepvl": load_step_vl,
13771404
"tarsier": load_tarsier,
13781405
"tarsier2": load_tarsier2,
13791406
"glm4_5v": load_glm4_5v,

0 commit comments

Comments
 (0)