Skip to content

Commit dfe43a2

Browse files
mrsalehisangho-visionywang96
authored
[Model] Molmo vLLM Integration (#9016)
Co-authored-by: sanghol <[email protected]> Co-authored-by: Roger Wang <[email protected]> Co-authored-by: Roger Wang <[email protected]>
1 parent 16b24e7 commit dfe43a2

File tree

7 files changed

+1319
-3
lines changed

7 files changed

+1319
-3
lines changed

docs/source/models/supported_models.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,12 @@ Text Generation
399399
- :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
400400
-
401401
-
402+
* - :code:`MolmoForCausalLM`
403+
- Molmo
404+
- Image
405+
- :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
406+
-
407+
- ✅︎
402408
* - :code:`NVLM_D_Model`
403409
- NVLM-D 1.0
404410
- Image\ :sup:`E+`

examples/offline_inference_vision_language.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,23 @@ def run_mllama(question: str, modality: str):
300300
return llm, prompt, stop_token_ids
301301

302302

303+
# Molmo
304+
def run_molmo(question, modality):
305+
assert modality == "image"
306+
307+
model_name = "allenai/Molmo-7B-D-0924"
308+
309+
llm = LLM(
310+
model=model_name,
311+
trust_remote_code=True,
312+
dtype="bfloat16",
313+
)
314+
315+
prompt = question
316+
stop_token_ids = None
317+
return llm, prompt, stop_token_ids
318+
319+
303320
# GLM-4v
304321
def run_glm4v(question: str, modality: str):
305322
assert modality == "image"
@@ -331,6 +348,7 @@ def run_glm4v(question: str, modality: str):
331348
"qwen_vl": run_qwen_vl,
332349
"qwen2_vl": run_qwen2_vl,
333350
"mllama": run_mllama,
351+
"molmo": run_molmo,
334352
"glm4v": run_glm4v,
335353
}
336354

vllm/entrypoints/chat_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ def _placeholder_str(self, modality: ModalityStr,
163163
return "<|image|>"
164164
if model_type == "qwen2_vl":
165165
return "<|vision_start|><|image_pad|><|vision_end|>"
166+
if model_type == "molmo":
167+
return ""
166168

167169
raise TypeError(f"Unknown model type: {model_type}")
168170
elif modality == "audio":

vllm/model_executor/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@
2020
"supports_multimodal",
2121
"SupportsPP",
2222
"supports_pp",
23-
]
23+
]

0 commit comments

Comments
 (0)