@@ -64,53 +64,45 @@ def download_images_and_create_json(
6464 args = parser .parse_args ()
6565
6666 datasets_info = {
67- "DocVQA " : {
68- "path" : "lmms-lab/DocVQA " ,
67+ "realworldqa " : {
68+ "path" : "visheratin/realworldqa " ,
6969 "image_key" : "image" ,
7070 "question_key" : "question" ,
71- "id_key" : "questionId" ,
72- "subset" : "DocVQA" ,
73- "split" : "test" ,
74- },
75- "ChartQA" : {
76- "path" : "HuggingFaceM4/ChartQA" ,
77- "image_key" : "image" ,
78- "question_key" : "query" ,
7971 "id_key" : "index" ,
8072 "subset" : False ,
8173 "split" : "test" ,
8274 },
83- "realworldqa " : {
84- "path" : "visheratin/realworldqa " ,
75+ "Memes " : {
76+ "path" : "not-lain/meme-dataset " ,
8577 "image_key" : "image" ,
86- "question_key" : "question " ,
78+ "question_key" : "name " ,
8779 "id_key" : "index" ,
8880 "subset" : False ,
89- "split" : "test " ,
81+ "split" : "train " ,
9082 },
91- "NewYorker " : {
92- "path" : "jmhessel/newyorker_caption_contest " ,
83+ "Floorplan " : {
84+ "path" : "umesh16071973/Floorplan_Dataset_21022024 " ,
9385 "image_key" : "image" ,
94- "question_key" : "questions " ,
86+ "question_key" : "caption " ,
9587 "id_key" : "index" ,
96- "subset" : "explanation" ,
88+ "subset" : False ,
9789 "split" : "train" ,
9890 },
99- "WikiArt " : {
100- "path" : "huggan/wikiart " ,
91+ "Website " : {
92+ "path" : "Zexanima/website_screenshots_image_dataset " ,
10193 "image_key" : "image" ,
102- "question_key" : "artist " ,
94+ "question_key" : "date_captured " ,
10395 "id_key" : "index" ,
10496 "subset" : False ,
10597 "split" : "train" ,
10698 },
107- "TextVQA " : {
108- "path" : "facebook/textvqa " ,
99+ "IllusionVQA " : {
100+ "path" : "csebuetnlp/illusionVQA-Comprehension " ,
109101 "image_key" : "image" ,
110102 "question_key" : "question" ,
111- "id_key" : "question_id " ,
103+ "id_key" : "index " ,
112104 "subset" : False ,
113- "split" : "train " ,
105+ "split" : "test " ,
114106 },
115107 }
116108
@@ -121,6 +113,7 @@ def download_images_and_create_json(
121113 for dataset_name in datasets_info .keys ():
122114 with open (f"{ args .output_dir } /{ dataset_name } /data.json" ) as f :
123115 data = json .load (f )
116+ print (f"Dataset: { dataset_name } , Number of examples: { len (data )} " )
124117 dataset_json .extend (np .random .choice (data , 500 ))
125118
126119 with open (f"{ args .output_dir } /metadata_sampled.json" , "w" ) as f :
0 commit comments