|
120 | 120 | "JambaForCausalLM": ("jamba", "JambaForCausalLM"),
|
121 | 121 | "Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"),
|
122 | 122 | "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
|
123 |
| - "Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"), # noqa: E501 |
| 123 | + "Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"), |
124 | 124 | # For decapoda-research/llama-*
|
125 | 125 | "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
|
126 | 126 | "LongcatFlashForCausalLM": ("longcat_flash", "LongcatFlashForCausalLM"),
|
|
204 | 204 | "LlavaNextForConditionalGeneration": (
|
205 | 205 | "llava_next",
|
206 | 206 | "LlavaNextForConditionalGeneration",
|
207 |
| - ), # noqa: E501 |
| 207 | + ), |
208 | 208 | "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
209 | 209 | "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
|
210 | 210 | # Technically Terratorch models work on images, both in
|
|
240 | 240 | "AyaVisionForConditionalGeneration": (
|
241 | 241 | "aya_vision",
|
242 | 242 | "AyaVisionForConditionalGeneration",
|
243 |
| - ), # noqa: E501 |
| 243 | + ), |
244 | 244 | "Blip2ForConditionalGeneration": ("blip2", "Blip2ForConditionalGeneration"),
|
245 | 245 | "ChameleonForConditionalGeneration": (
|
246 | 246 | "chameleon",
|
247 | 247 | "ChameleonForConditionalGeneration",
|
248 |
| - ), # noqa: E501 |
| 248 | + ), |
249 | 249 | "Cohere2VisionForConditionalGeneration": (
|
250 | 250 | "cohere2_vision",
|
251 | 251 | "Cohere2VisionForConditionalGeneration",
|
252 |
| - ), # noqa: E501 |
| 252 | + ), |
253 | 253 | "DeepseekVLV2ForCausalLM": ("deepseek_vl2", "DeepseekVLV2ForCausalLM"),
|
254 | 254 | "DotsOCRForCausalLM": ("dots_ocr", "DotsOCRForCausalLM"),
|
255 | 255 | "Ernie4_5_VLMoeForConditionalGeneration": (
|
256 | 256 | "ernie45_vl",
|
257 | 257 | "Ernie4_5_VLMoeForConditionalGeneration",
|
258 |
| - ), # noqa: E501 |
| 258 | + ), |
259 | 259 | "FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"),
|
260 | 260 | "Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
|
261 | 261 | "Gemma3nForConditionalGeneration": (
|
262 | 262 | "gemma3n_mm",
|
263 | 263 | "Gemma3nForConditionalGeneration",
|
264 |
| - ), # noqa: E501 |
| 264 | + ), |
265 | 265 | "GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
|
266 | 266 | "Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
267 | 267 | "Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"), # noqa: E501
|
268 | 268 | "GraniteSpeechForConditionalGeneration": (
|
269 | 269 | "granite_speech",
|
270 | 270 | "GraniteSpeechForConditionalGeneration",
|
271 |
| - ), # noqa: E501 |
| 271 | + ), |
272 | 272 | "H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
|
273 | 273 | "InternVLChatModel": ("internvl", "InternVLChatModel"),
|
274 | 274 | "NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
|
275 | 275 | "InternS1ForConditionalGeneration": (
|
276 | 276 | "interns1",
|
277 | 277 | "InternS1ForConditionalGeneration",
|
278 |
| - ), # noqa: E501 |
| 278 | + ), |
279 | 279 | "InternVLForConditionalGeneration": (
|
280 | 280 | "interns1",
|
281 | 281 | "InternS1ForConditionalGeneration",
|
282 |
| - ), # noqa: E501 |
| 282 | + ), |
283 | 283 | "Idefics3ForConditionalGeneration": (
|
284 | 284 | "idefics3",
|
285 | 285 | "Idefics3ForConditionalGeneration",
|
|
289 | 289 | "KeyeVL1_5ForConditionalGeneration": (
|
290 | 290 | "keye_vl1_5",
|
291 | 291 | "KeyeVL1_5ForConditionalGeneration",
|
292 |
| - ), # noqa: E501 |
| 292 | + ), |
293 | 293 | "RForConditionalGeneration": ("rvl", "RForConditionalGeneration"),
|
294 | 294 | "KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"), # noqa: E501
|
295 | 295 | "Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"),
|
|
298 | 298 | "LlavaNextForConditionalGeneration": (
|
299 | 299 | "llava_next",
|
300 | 300 | "LlavaNextForConditionalGeneration",
|
301 |
| - ), # noqa: E501 |
| 301 | + ), |
302 | 302 | "LlavaNextVideoForConditionalGeneration": (
|
303 | 303 | "llava_next_video",
|
304 | 304 | "LlavaNextVideoForConditionalGeneration",
|
305 |
| - ), # noqa: E501 |
| 305 | + ), |
306 | 306 | "LlavaOnevisionForConditionalGeneration": (
|
307 | 307 | "llava_onevision",
|
308 | 308 | "LlavaOnevisionForConditionalGeneration",
|
309 |
| - ), # noqa: E501 |
| 309 | + ), |
310 | 310 | "MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"), # noqa: E501
|
311 | 311 | "MiDashengLMModel": ("midashenglm", "MiDashengLMModel"),
|
312 | 312 | "MiniMaxVL01ForConditionalGeneration": (
|
313 | 313 | "minimax_vl_01",
|
314 | 314 | "MiniMaxVL01ForConditionalGeneration",
|
315 |
| - ), # noqa: E501 |
| 315 | + ), |
316 | 316 | "MiniCPMO": ("minicpmo", "MiniCPMO"),
|
317 | 317 | "MiniCPMV": ("minicpmv", "MiniCPMV"),
|
318 | 318 | "Mistral3ForConditionalGeneration": (
|
319 | 319 | "mistral3",
|
320 | 320 | "Mistral3ForConditionalGeneration",
|
321 |
| - ), # noqa: E501 |
| 321 | + ), |
322 | 322 | "MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
|
323 | 323 | "NVLM_D": ("nvlm_d", "NVLM_D_Model"),
|
324 | 324 | "Ovis": ("ovis", "Ovis"),
|
325 | 325 | "Ovis2_5": ("ovis2_5", "Ovis2_5"),
|
326 | 326 | "PaliGemmaForConditionalGeneration": (
|
327 | 327 | "paligemma",
|
328 | 328 | "PaliGemmaForConditionalGeneration",
|
329 |
| - ), # noqa: E501 |
| 329 | + ), |
330 | 330 | "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
331 | 331 | "Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
|
332 | 332 | "Phi4MultimodalForCausalLM": ("phi4_multimodal", "Phi4MultimodalForCausalLM"), # noqa: E501
|
|
336 | 336 | "Qwen2_5_VLForConditionalGeneration": (
|
337 | 337 | "qwen2_5_vl",
|
338 | 338 | "Qwen2_5_VLForConditionalGeneration",
|
339 |
| - ), # noqa: E501 |
| 339 | + ), |
340 | 340 | "Qwen2AudioForConditionalGeneration": (
|
341 | 341 | "qwen2_audio",
|
342 | 342 | "Qwen2AudioForConditionalGeneration",
|
343 |
| - ), # noqa: E501 |
| 343 | + ), |
344 | 344 | "Qwen2_5OmniModel": (
|
345 | 345 | "qwen2_5_omni_thinker",
|
346 | 346 | "Qwen2_5OmniThinkerForConditionalGeneration",
|
347 |
| - ), # noqa: E501 |
| 347 | + ), |
348 | 348 | "Qwen2_5OmniForConditionalGeneration": (
|
349 | 349 | "qwen2_5_omni_thinker",
|
350 | 350 | "Qwen2_5OmniThinkerForConditionalGeneration",
|
351 |
| - ), # noqa: E501 |
| 351 | + ), |
352 | 352 | "Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501
|
353 | 353 | "Qwen3VLMoeForConditionalGeneration": (
|
354 | 354 | "qwen3_vl_moe",
|
355 | 355 | "Qwen3VLMoeForConditionalGeneration",
|
356 |
| - ), # noqa: E501 |
| 356 | + ), |
357 | 357 | "SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"),
|
358 | 358 | "Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501
|
359 | 359 | "TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501
|
360 | 360 | "Tarsier2ForConditionalGeneration": (
|
361 | 361 | "qwen2_vl",
|
362 | 362 | "Tarsier2ForConditionalGeneration",
|
363 |
| - ), # noqa: E501 |
| 363 | + ), |
364 | 364 | "UltravoxModel": ("ultravox", "UltravoxModel"),
|
365 | 365 | "VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501
|
366 | 366 | # [Encoder-decoder]
|
|
401 | 401 | "TransformersMoEForMultimodalLM": (
|
402 | 402 | "transformers_moe",
|
403 | 403 | "TransformersMoEForMultimodalLM",
|
404 |
| - ), # noqa: E501 |
| 404 | + ), |
405 | 405 | "TransformersEmbeddingModel": (
|
406 | 406 | "transformers_pooling",
|
407 | 407 | "TransformersEmbeddingModel",
|
408 |
| - ), # noqa: E501 |
| 408 | + ), |
409 | 409 | "TransformersForSequenceClassification": (
|
410 | 410 | "transformers_pooling",
|
411 | 411 | "TransformersForSequenceClassification",
|
412 |
| - ), # noqa: E501 |
| 412 | + ), |
413 | 413 | "TransformersMoEForSequenceClassification": (
|
414 | 414 | "transformers_pooling",
|
415 | 415 | "TransformersMoEForSequenceClassification",
|
416 |
| - ), # noqa: E501 |
| 416 | + ), |
417 | 417 | "TransformersMoEEmbeddingModel": (
|
418 | 418 | "transformers_pooling",
|
419 | 419 | "TransformersMoEEmbeddingModel",
|
420 |
| - ), # noqa: E501 |
| 420 | + ), |
421 | 421 | }
|
422 | 422 |
|
423 | 423 | _VLLM_MODELS = {
|
|
0 commit comments