@@ -47,20 +47,24 @@ const taskData: TaskDataCustom = {
4747 id : "meta-llama/Llama-3.2-11B-Vision-Instruct" ,
4848 } ,
4949 {
50- description : "Cutting-edge conversational vision language model that can take multiple image inputs ." ,
51- id : "HuggingFaceM4/idefics2-8b-chatty " ,
50+ description : "Cutting-edge vision language models ." ,
51+ id : "allenai/Molmo-7B-D-0924 " ,
5252 } ,
5353 {
5454 description : "Small yet powerful model." ,
5555 id : "vikhyatk/moondream2" ,
5656 } ,
5757 {
58- description : "Strong image-text-to-text model made to understand documents ." ,
59- id : "mPLUG/DocOwl1.5 " ,
58+ description : "Strong image-text-to-text model." ,
59+ id : "Qwen/Qwen2-VL-7B-Instruct " ,
6060 } ,
6161 {
6262 description : "Strong image-text-to-text model." ,
63- id : "microsoft/Phi-3.5-vision-instruct" ,
63+ id : "mistralai/Pixtral-12B-2409" ,
64+ } ,
65+ {
66+ description : "Strong image-text-to-text model focused on documents." ,
67+ id : "stepfun-ai/GOT-OCR2_0" ,
6468 } ,
6569 ] ,
6670 spaces : [
@@ -74,15 +78,19 @@ const taskData: TaskDataCustom = {
7478 } ,
7579 {
7680 description : "Powerful vision-language model assistant." ,
77- id : "liuhaotian/LLaVA-1.6" ,
81+ id : "akhaliq/Molmo-7B-D-0924" ,
82+ } ,
83+ {
84+ description : "An image-text-to-text application focused on documents." ,
85+ id : "stepfun-ai/GOT_official_online_demo" ,
7886 } ,
7987 {
8088 description : "An application to compare outputs of different vision language models." ,
8189 id : "merve/compare_VLMs" ,
8290 } ,
8391 {
84- description : "An application for document vision language tasks ." ,
85- id : "mPLUG/DocOwl " ,
92+ description : "An application for chatting with an image-text-to-text model ." ,
93+ id : "GanymedeNil/Qwen2-VL-7B " ,
8694 } ,
8795 ] ,
8896 summary :
0 commit comments