@@ -326,6 +326,45 @@ def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
326
326
)
327
327
328
328
329
+ def load_kimi_vl (question : str , image_urls : list [str ]) -> ModelRequestData :
330
+ model_name = "moonshotai/Kimi-VL-A3B-Instruct"
331
+
332
+ engine_args = EngineArgs (
333
+ model = model_name ,
334
+ max_model_len = 4096 ,
335
+ max_num_seqs = 4 ,
336
+ tensor_parallel_size = 1 ,
337
+ limit_mm_per_prompt = {"image" : len (image_urls )},
338
+ trust_remote_code = True ,
339
+ )
340
+
341
+ placeholders = [{"type" : "image" , "image" : url } for url in image_urls ]
342
+ messages = [{
343
+ "role" :
344
+ "user" ,
345
+ "content" : [
346
+ * placeholders ,
347
+ {
348
+ "type" : "text" ,
349
+ "text" : question
350
+ },
351
+ ],
352
+ }]
353
+
354
+ processor = AutoProcessor .from_pretrained (model_name ,
355
+ trust_remote_code = True )
356
+
357
+ prompt = processor .apply_chat_template (messages ,
358
+ tokenize = False ,
359
+ add_generation_prompt = True )
360
+
361
+ return ModelRequestData (
362
+ engine_args = engine_args ,
363
+ prompt = prompt ,
364
+ image_data = [fetch_image (url ) for url in image_urls ],
365
+ )
366
+
367
+
329
368
def load_mistral3 (question : str , image_urls : list [str ]) -> ModelRequestData :
330
369
model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
331
370
@@ -640,6 +679,7 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
640
679
"h2ovl_chat" : load_h2ovl ,
641
680
"idefics3" : load_idefics3 ,
642
681
"internvl_chat" : load_internvl ,
682
+ "kimi_vl" : load_kimi_vl ,
643
683
"llama4" : load_llama4 ,
644
684
"mistral3" : load_mistral3 ,
645
685
"mllama" : load_mllama ,
0 commit comments