|
123 | 123 | ("idefics2", "Idefics2ForConditionalGeneration"), |
124 | 124 | ("idefics3", "Idefics3ForConditionalGeneration"), |
125 | 125 | ("llava", "LlavaForConditionalGeneration"), |
| 126 | + ("llava_next", "LlavaNextForConditionalGeneration"), |
| 127 | + ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), |
| 128 | + ("llava_onevision", "LlavaOnevisionForConditionalGeneration"), |
126 | 129 | ("mobilebert", "MobileBertForPreTraining"), |
127 | 130 | ("qwen2_audio", "Qwen2AudioForConditionalGeneration"), |
128 | 131 | ("roberta", "RobertaForMaskedLM"), |
|
214 | 217 | ("ijepa", "IJepaModel"), |
215 | 218 | ("imagegpt", "ImageGPTModel"), |
216 | 219 | ("levit", "LevitModel"), |
| 220 | + ("siglip_vision_model", "SiglipVisionModel"), |
217 | 221 | ] |
218 | 222 | ) |
219 | 223 |
|
|
260 | 264 | ("idefics2", "Idefics2ForConditionalGeneration"), |
261 | 265 | ("idefics3", "Idefics3ForConditionalGeneration"), |
262 | 266 | ("llava", "LlavaForConditionalGeneration"), |
| 267 | + ("llava_next", "LlavaNextForConditionalGeneration"), |
| 268 | + ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), |
| 269 | + ("llava_onevision", "LlavaOnevisionForConditionalGeneration"), |
263 | 270 | ("paligemma", "PaliGemmaForConditionalGeneration"), |
264 | 271 | ("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"), |
265 | 272 | ("qwen2_vl", "Qwen2VLForConditionalGeneration"), |
|
278 | 285 | ("idefics3", "Idefics3ForConditionalGeneration"), |
279 | 286 | ("fuyu", "FuyuForCausalLM"), |
280 | 287 | ("llava", "LlavaForConditionalGeneration"), |
| 288 | + ("llava_next", "LlavaNextForConditionalGeneration"), |
| 289 | + ("llava_onevision", "LlavaOnevisionForConditionalGeneration"), |
281 | 290 | ("paligemma", "PaliGemmaForConditionalGeneration"), |
282 | 291 | ("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"), |
283 | 292 | ("qwen2_vl", "Qwen2VLForConditionalGeneration"), |
|
410 | 419 | ("led", "LEDForQuestionAnswering"), |
411 | 420 | ("convbert", "ConvBertForQuestionAnswering"), |
412 | 421 | ("llama", "LlamaForQuestionAnswering"), |
| 422 | + ("mistral", "MistralForQuestionAnswering"), |
413 | 423 | ("mobilebert", "MobileBertForQuestionAnswering"), |
414 | 424 | ("megatron-bert", "MegatronBertForQuestionAnswering"), |
415 | 425 | ("mistral", "MistralForQuestionAnswering"), |
|
529 | 539 |
|
530 | 540 | MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES = OrderedDict( |
531 | 541 | [ |
| 542 | + # Model for Zero Shot Image Classification mapping |
532 | 543 | ("blip", "BlipModel"), |
533 | 544 | ("siglip", "SiglipModel"), |
534 | 545 | ] |
|
0 commit comments