Skip to content

Commit 9f909b8

Browse files
authored
[New Model] Support Command-A-Vision (#22660)
Signed-off-by: donglu <[email protected]>
1 parent 59f3b93 commit 9f909b8

File tree

6 files changed

+510
-1
lines changed

6 files changed

+510
-1
lines changed

docs/models/supported_models.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ th {
331331
| `BloomForCausalLM` | BLOOM, BLOOMZ, BLOOMChat | `bigscience/bloom`, `bigscience/bloomz`, etc. | | ✅︎ | |
332332
| `BartForConditionalGeneration` | BART | `facebook/bart-base`, `facebook/bart-large-cnn`, etc. | | | |
333333
| `ChatGLMModel`, `ChatGLMForConditionalGeneration` | ChatGLM | `zai-org/chatglm2-6b`, `zai-org/chatglm3-6b`, `ShieldLM-6B-chatglm3`, etc. | ✅︎ | ✅︎ | ✅︎ |
334-
| `CohereForCausalLM`, `Cohere2ForCausalLM` | Command-R | `CohereForAI/c4ai-command-r-v01`, `CohereForAI/c4ai-command-r7b-12-2024`, etc. | ✅︎ | ✅︎ | ✅︎ |
334+
| `CohereForCausalLM`, `Cohere2ForCausalLM` | Command-R | `CohereLabs/c4ai-command-r-v01`, `CohereLabs/c4ai-command-r7b-12-2024`, etc. | ✅︎ | ✅︎ | ✅︎ |
335335
| `DbrxForCausalLM` | DBRX | `databricks/dbrx-base`, `databricks/dbrx-instruct`, etc. | | ✅︎ | ✅︎ |
336336
| `DeciLMForCausalLM` | DeciLM | `nvidia/Llama-3_3-Nemotron-Super-49B-v1`, etc. | ✅︎ | ✅︎ | ✅︎ |
337337
| `DeepseekForCausalLM` | DeepSeek | `deepseek-ai/deepseek-llm-67b-base`, `deepseek-ai/deepseek-llm-7b-chat`, etc. | | ✅︎ | ✅︎ |
@@ -601,6 +601,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
601601
| `AyaVisionForConditionalGeneration` | Aya Vision | T + I<sup>+</sup> | `CohereForAI/aya-vision-8b`, `CohereForAI/aya-vision-32b`, etc. | | ✅︎ | ✅︎ |
602602
| `Blip2ForConditionalGeneration` | BLIP-2 | T + I<sup>E</sup> | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc. | | ✅︎ | ✅︎ |
603603
| `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ | ✅︎ |
604+
| `Cohere2VisionForConditionalGeneration` | Command A Vision | T + I<sup>+</sup> | `CohereLabs/command-a-vision-07-2025`, etc. | | ✅︎ | ✅︎ |
604605
| `DeepseekVLV2ForCausalLM`<sup>^</sup> | DeepSeek-VL2 | T + I<sup>+</sup> | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ | ✅︎ |
605606
| `Florence2ForConditionalGeneration` | Florence-2 | T + I | `microsoft/Florence-2-base`, `microsoft/Florence-2-large`, etc. | | | |
606607
| `FuyuForCausalLM` | Fuyu | T + I | `adept/fuyu-8b`, etc. | | ✅︎ | ✅︎ |

examples/offline_inference/vision_language.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,29 @@ def run_chameleon(questions: list[str], modality: str) -> ModelRequestData:
126126
)
127127

128128

129+
def run_command_a_vision(questions: list[str], modality: str) -> ModelRequestData:
130+
assert modality == "image"
131+
132+
model_name = "CohereLabs/command-a-vision-07-2025"
133+
134+
engine_args = EngineArgs(
135+
model=model_name,
136+
max_model_len=32768,
137+
tensor_parallel_size=4,
138+
limit_mm_per_prompt={modality: 1},
139+
)
140+
141+
prompts = [
142+
f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|><|IMG_PATCH|>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
143+
for question in questions
144+
]
145+
146+
return ModelRequestData(
147+
engine_args=engine_args,
148+
prompts=prompts,
149+
)
150+
151+
129152
# Deepseek-VL2
130153
def run_deepseek_vl2(questions: list[str], modality: str) -> ModelRequestData:
131154
assert modality == "image"
@@ -1417,6 +1440,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
14171440
"aya_vision": run_aya_vision,
14181441
"blip-2": run_blip2,
14191442
"chameleon": run_chameleon,
1443+
"command_a_vision": run_command_a_vision,
14201444
"deepseek_vl_v2": run_deepseek_vl2,
14211445
"florence2": run_florence2,
14221446
"fuyu": run_fuyu,

examples/offline_inference/vision_language_multi_image.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,42 @@ def load_aya_vision(question: str, image_urls: list[str]) -> ModelRequestData:
107107
)
108108

109109

110+
def load_command_a_vision(question: str, image_urls: list[str]) -> ModelRequestData:
111+
model_name = "CohereLabs/command-a-vision-07-2025"
112+
113+
# NOTE: This model is 122B parameters and requires tensor parallelism
114+
# Recommended to use tp=4 on H100 GPUs
115+
engine_args = EngineArgs(
116+
model=model_name,
117+
max_model_len=32768,
118+
tensor_parallel_size=4,
119+
limit_mm_per_prompt={"image": len(image_urls)},
120+
)
121+
122+
placeholders = [{"type": "image", "image": url} for url in image_urls]
123+
messages = [
124+
{
125+
"role": "user",
126+
"content": [
127+
*placeholders,
128+
{"type": "text", "text": question},
129+
],
130+
}
131+
]
132+
133+
processor = AutoProcessor.from_pretrained(model_name)
134+
135+
prompt = processor.apply_chat_template(
136+
messages, tokenize=False, add_generation_prompt=True
137+
)
138+
139+
return ModelRequestData(
140+
engine_args=engine_args,
141+
prompt=prompt,
142+
image_data=[fetch_image(url) for url in image_urls],
143+
)
144+
145+
110146
def load_deepseek_vl2(question: str, image_urls: list[str]) -> ModelRequestData:
111147
model_name = "deepseek-ai/deepseek-vl2-tiny"
112148

@@ -1031,6 +1067,7 @@ def load_tarsier2(question: str, image_urls: list[str]) -> ModelRequestData:
10311067
model_example_map = {
10321068
"aria": load_aria,
10331069
"aya_vision": load_aya_vision,
1070+
"command_a_vision": load_command_a_vision,
10341071
"deepseek_vl_v2": load_deepseek_vl2,
10351072
"gemma3": load_gemma3,
10361073
"h2ovl_chat": load_h2ovl,

tests/models/registry.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ def check_available_online(
383383
"Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b", # noqa: E501
384384
extras={"6b": "Salesforce/blip2-opt-6.7b"}), # noqa: E501
385385
"ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"), # noqa: E501
386+
"Cohere2VisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/command-a-vision-07-2025"), # noqa: E501
386387
"DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny", # noqa: E501
387388
extras={"fork": "Isotr0py/deepseek-vl2-tiny"}, # noqa: E501
388389
max_transformers_version="4.48", # noqa: E501

0 commit comments

Comments
 (0)