File tree Expand file tree Collapse file tree 2 files changed +15
-6
lines changed
Expand file tree Collapse file tree 2 files changed +15
-6
lines changed Original file line number Diff line number Diff line change @@ -104,6 +104,14 @@ def download_images_and_create_json(
104104 "subset" : False ,
105105 "split" : "test" ,
106106 },
107+ "NewYorker" : {
108+ "path" : "jmhessel/newyorker_caption_contest" ,
109+ "image_key" : "image" ,
110+ "question_key" : "questions" ,
111+ "id_key" : "index" ,
112+ "subset" : "explanation" ,
113+ "split" : "train" ,
114+ },
107115 }
108116
109117 download_images_and_create_json (
@@ -114,7 +122,7 @@ def download_images_and_create_json(
114122 with open (f"{ args .output_dir } /{ dataset_name } /data.json" ) as f :
115123 data = json .load (f )
116124 print (f"Dataset: { dataset_name } , Number of examples: { len (data )} " )
117- dataset_json .extend (np . random . choice ( data , 500 ) )
125+ dataset_json .extend (data )
118126
119127 with open (f"{ args .output_dir } /metadata_sampled.json" , "w" ) as f :
120128 json .dump (dataset_json , f , indent = 4 )
Original file line number Diff line number Diff line change 1717 args = parser .parse_args ()
1818
1919 dataset_prop = {
20- "DocVQA" : 500 ,
21- "ChartQA" : 500 ,
22- "NewYorker" : 1000 ,
23- "WikiArt" : 500 ,
24- "TextVQA" : 500 ,
20+ "realworldqa" : 500 ,
21+ "Memes" : 500 ,
22+ "Floorplan" : 500 ,
23+ "Website" : 500 ,
24+ "IllusionVQA" : 500 ,
25+ "NewYorker" : 500 ,
2526 }
2627
2728 dataset_json = []
You can’t perform that action at this time.
0 commit comments