vllm-project
diff --git a/‎docs/source/models/supported_models.rst‎
Lines changed: 6 additions & 0 deletions b/‎docs/source/models/supported_models.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/offline_inference_vision_language.py‎
Lines changed: 18 additions & 0 deletions b/‎examples/offline_inference_vision_language.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎vllm/entrypoints/chat_utils.py‎
Lines changed: 2 additions & 0 deletions b/‎vllm/entrypoints/chat_utils.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎vllm/model_executor/models/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎vllm/model_executor/models/__init__.py‎
Lines changed: 1 addition & 1 deletion
@@ -399,6 +399,12 @@ Text Generation
     - :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
     -
     -
+  * - :code:`MolmoForCausalLM`
+    - Molmo
+    - Image
+    - :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
+    -
+    - ✅︎
   * - :code:`NVLM_D_Model`
     - NVLM-D 1.0
     - Image\ :sup:`E+`
 
@@ -300,6 +300,23 @@ def run_mllama(question: str, modality: str):
     return llm, prompt, stop_token_ids
 
 
+# Molmo
+def run_molmo(question, modality):
+    assert modality == "image"
+
+    model_name = "allenai/Molmo-7B-D-0924"
+
+    llm = LLM(
+        model=model_name,
+        trust_remote_code=True,
+        dtype="bfloat16",
+    )
+
+    prompt = question
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+
 # GLM-4v
 def run_glm4v(question: str, modality: str):
     assert modality == "image"
@@ -331,6 +348,7 @@ def run_glm4v(question: str, modality: str):
     "qwen_vl": run_qwen_vl,
     "qwen2_vl": run_qwen2_vl,
     "mllama": run_mllama,
+    "molmo": run_molmo,
     "glm4v": run_glm4v,
 }
 
 
@@ -163,6 +163,8 @@ def _placeholder_str(self, modality: ModalityStr,
                 return "<|image|>"
             if model_type == "qwen2_vl":
                 return "<|vision_start|><|image_pad|><|vision_end|>"
+            if model_type == "molmo":
+                return ""
 
             raise TypeError(f"Unknown model type: {model_type}")
         elif modality == "audio":
 
@@ -20,4 +20,4 @@
     "supports_multimodal",
     "SupportsPP",
     "supports_pp",
-]
+]
Original file line number	Diff line number	Diff line change
`@@ -399,6 +399,12 @@ Text Generation`
`399`	`399`	- :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
`400`	`400`	`-`
`401`	`401`	`-`
	`402`	+ * - :code:`MolmoForCausalLM`
	`403`	`+ - Molmo`
	`404`	`+ - Image`
	`405`	+ - :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
	`406`	`+ -`
	`407`	`+ - ✅︎`
`402`	`408`	* - :code:`NVLM_D_Model`
`403`	`409`	`- NVLM-D 1.0`
`404`	`410`	- Image\ :sup:`E+`