@@ -371,13 +371,14 @@ def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
371
371
)
372
372
373
373
374
- def load_llama4 (question : str , image_urls : list [str ]) -> ModelRequestData :
375
- model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct "
374
+ def load_keye_vl (question : str , image_urls : list [str ]) -> ModelRequestData :
375
+ model_name = "Kwai-Keye/Keye-VL-8B-Preview "
376
376
377
377
engine_args = EngineArgs (
378
378
model = model_name ,
379
- max_model_len = 131072 ,
380
- tensor_parallel_size = 8 ,
379
+ trust_remote_code = True ,
380
+ max_model_len = 8192 ,
381
+ max_num_seqs = 5 ,
381
382
limit_mm_per_prompt = {"image" : len (image_urls )},
382
383
)
383
384
@@ -389,29 +390,32 @@ def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
389
390
* placeholders ,
390
391
{"type" : "text" , "text" : question },
391
392
],
392
- }
393
+ },
393
394
]
394
395
395
- processor = AutoProcessor .from_pretrained (model_name )
396
+ processor = AutoProcessor .from_pretrained (model_name , trust_remote_code = True )
396
397
397
398
prompt = processor .apply_chat_template (
398
399
messages , tokenize = False , add_generation_prompt = True
399
400
)
400
401
402
+ image_data = [fetch_image (url ) for url in image_urls ]
403
+
401
404
return ModelRequestData (
402
405
engine_args = engine_args ,
403
406
prompt = prompt ,
404
- image_data = [ fetch_image ( url ) for url in image_urls ] ,
407
+ image_data = image_data ,
405
408
)
406
409
407
410
408
- def load_llava (question : str , image_urls : list [str ]) -> ModelRequestData :
409
- # NOTE: CAUTION! Original Llava models wasn't really trained on multi-image inputs,
410
- # it will generate poor response for multi-image inputs!
411
- model_name = "llava-hf/llava-1.5-7b-hf"
411
+ def load_keye_vl1_5 (question : str , image_urls : list [str ]) -> ModelRequestData :
412
+ model_name = "Kwai-Keye/Keye-VL-1_5-8B"
413
+
412
414
engine_args = EngineArgs (
413
415
model = model_name ,
414
- max_num_seqs = 16 ,
416
+ trust_remote_code = True ,
417
+ max_model_len = 32768 ,
418
+ max_num_seqs = 5 ,
415
419
limit_mm_per_prompt = {"image" : len (image_urls )},
416
420
)
417
421
@@ -423,28 +427,32 @@ def load_llava(question: str, image_urls: list[str]) -> ModelRequestData:
423
427
* placeholders ,
424
428
{"type" : "text" , "text" : question },
425
429
],
426
- }
430
+ },
427
431
]
428
432
429
- processor = AutoProcessor .from_pretrained (model_name )
433
+ processor = AutoProcessor .from_pretrained (model_name , trust_remote_code = True )
430
434
431
435
prompt = processor .apply_chat_template (
432
436
messages , tokenize = False , add_generation_prompt = True
433
437
)
434
438
439
+ image_data = [fetch_image (url ) for url in image_urls ]
440
+
435
441
return ModelRequestData (
436
442
engine_args = engine_args ,
437
443
prompt = prompt ,
438
- image_data = [ fetch_image ( url ) for url in image_urls ] ,
444
+ image_data = image_data ,
439
445
)
440
446
441
447
442
- def load_llava_next (question : str , image_urls : list [str ]) -> ModelRequestData :
443
- model_name = "llava-hf/llava-v1.6-mistral-7b-hf"
448
+ def load_kimi_vl (question : str , image_urls : list [str ]) -> ModelRequestData :
449
+ model_name = "moonshotai/Kimi-VL-A3B-Instruct"
450
+
444
451
engine_args = EngineArgs (
445
452
model = model_name ,
446
- max_model_len = 8192 ,
447
- max_num_seqs = 16 ,
453
+ trust_remote_code = True ,
454
+ max_model_len = 4096 ,
455
+ max_num_seqs = 4 ,
448
456
limit_mm_per_prompt = {"image" : len (image_urls )},
449
457
)
450
458
@@ -459,7 +467,7 @@ def load_llava_next(question: str, image_urls: list[str]) -> ModelRequestData:
459
467
}
460
468
]
461
469
462
- processor = AutoProcessor .from_pretrained (model_name )
470
+ processor = AutoProcessor .from_pretrained (model_name , trust_remote_code = True )
463
471
464
472
prompt = processor .apply_chat_template (
465
473
messages , tokenize = False , add_generation_prompt = True
@@ -472,12 +480,13 @@ def load_llava_next(question: str, image_urls: list[str]) -> ModelRequestData:
472
480
)
473
481
474
482
475
- def load_llava_onevision (question : str , image_urls : list [str ]) -> ModelRequestData :
476
- model_name = "llava-hf/llava-onevision-qwen2-7b-ov-hf"
483
+ def load_llama4 (question : str , image_urls : list [str ]) -> ModelRequestData :
484
+ model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
485
+
477
486
engine_args = EngineArgs (
478
487
model = model_name ,
479
- max_model_len = 16384 ,
480
- max_num_seqs = 16 ,
488
+ max_model_len = 131072 ,
489
+ tensor_parallel_size = 8 ,
481
490
limit_mm_per_prompt = {"image" : len (image_urls )},
482
491
)
483
492
@@ -505,14 +514,13 @@ def load_llava_onevision(question: str, image_urls: list[str]) -> ModelRequestDa
505
514
)
506
515
507
516
508
- def load_keye_vl (question : str , image_urls : list [str ]) -> ModelRequestData :
509
- model_name = "Kwai-Keye/Keye-VL-8B-Preview"
510
-
517
+ def load_llava (question : str , image_urls : list [str ]) -> ModelRequestData :
518
+ # NOTE: CAUTION! Original Llava models wasn't really trained on multi-image inputs,
519
+ # it will generate poor response for multi-image inputs!
520
+ model_name = "llava-hf/llava-1.5-7b-hf"
511
521
engine_args = EngineArgs (
512
522
model = model_name ,
513
- trust_remote_code = True ,
514
- max_model_len = 8192 ,
515
- max_num_seqs = 5 ,
523
+ max_num_seqs = 16 ,
516
524
limit_mm_per_prompt = {"image" : len (image_urls )},
517
525
)
518
526
@@ -524,32 +532,28 @@ def load_keye_vl(question: str, image_urls: list[str]) -> ModelRequestData:
524
532
* placeholders ,
525
533
{"type" : "text" , "text" : question },
526
534
],
527
- },
535
+ }
528
536
]
529
537
530
- processor = AutoProcessor .from_pretrained (model_name , trust_remote_code = True )
538
+ processor = AutoProcessor .from_pretrained (model_name )
531
539
532
540
prompt = processor .apply_chat_template (
533
541
messages , tokenize = False , add_generation_prompt = True
534
542
)
535
543
536
- image_data = [fetch_image (url ) for url in image_urls ]
537
-
538
544
return ModelRequestData (
539
545
engine_args = engine_args ,
540
546
prompt = prompt ,
541
- image_data = image_data ,
547
+ image_data = [ fetch_image ( url ) for url in image_urls ] ,
542
548
)
543
549
544
550
545
- def load_keye_vl1_5 (question : str , image_urls : list [str ]) -> ModelRequestData :
546
- model_name = "Kwai-Keye/Keye-VL-1_5-8B"
547
-
551
+ def load_llava_next (question : str , image_urls : list [str ]) -> ModelRequestData :
552
+ model_name = "llava-hf/llava-v1.6-mistral-7b-hf"
548
553
engine_args = EngineArgs (
549
554
model = model_name ,
550
- trust_remote_code = True ,
551
- max_model_len = 32768 ,
552
- max_num_seqs = 5 ,
555
+ max_model_len = 8192 ,
556
+ max_num_seqs = 16 ,
553
557
limit_mm_per_prompt = {"image" : len (image_urls )},
554
558
)
555
559
@@ -561,32 +565,28 @@ def load_keye_vl1_5(question: str, image_urls: list[str]) -> ModelRequestData:
561
565
* placeholders ,
562
566
{"type" : "text" , "text" : question },
563
567
],
564
- },
568
+ }
565
569
]
566
570
567
- processor = AutoProcessor .from_pretrained (model_name , trust_remote_code = True )
571
+ processor = AutoProcessor .from_pretrained (model_name )
568
572
569
573
prompt = processor .apply_chat_template (
570
574
messages , tokenize = False , add_generation_prompt = True
571
575
)
572
576
573
- image_data = [fetch_image (url ) for url in image_urls ]
574
-
575
577
return ModelRequestData (
576
578
engine_args = engine_args ,
577
579
prompt = prompt ,
578
- image_data = image_data ,
580
+ image_data = [ fetch_image ( url ) for url in image_urls ] ,
579
581
)
580
582
581
583
582
- def load_kimi_vl (question : str , image_urls : list [str ]) -> ModelRequestData :
583
- model_name = "moonshotai/Kimi-VL-A3B-Instruct"
584
-
584
+ def load_llava_onevision (question : str , image_urls : list [str ]) -> ModelRequestData :
585
+ model_name = "llava-hf/llava-onevision-qwen2-7b-ov-hf"
585
586
engine_args = EngineArgs (
586
587
model = model_name ,
587
- trust_remote_code = True ,
588
- max_model_len = 4096 ,
589
- max_num_seqs = 4 ,
588
+ max_model_len = 16384 ,
589
+ max_num_seqs = 16 ,
590
590
limit_mm_per_prompt = {"image" : len (image_urls )},
591
591
)
592
592
@@ -601,7 +601,7 @@ def load_kimi_vl(question: str, image_urls: list[str]) -> ModelRequestData:
601
601
}
602
602
]
603
603
604
- processor = AutoProcessor .from_pretrained (model_name , trust_remote_code = True )
604
+ processor = AutoProcessor .from_pretrained (model_name )
605
605
606
606
prompt = processor .apply_chat_template (
607
607
messages , tokenize = False , add_generation_prompt = True
0 commit comments