@@ -131,262 +131,8 @@ def filter_english_datasets():
131131 return sorted (english_datasets )
132132
133133
134- def list_datasets (template_collection , _priority_filter , _priority_max_templates , _state ):
134+ def list_datasets (template_collection , _state ):
135135 """Get all the datasets to work with."""
136136 dataset_list = filter_english_datasets ()
137- count_dict = template_collection .get_templates_count ()
138- if _priority_filter :
139- dataset_list = list (
140- set (dataset_list )
141- - set (
142- list (
143- d
144- for d in count_dict
145- if count_dict [d ] > _priority_max_templates and d != _state .working_priority_ds
146- )
147- )
148- )
149- dataset_list .sort ()
150- else :
151- dataset_list .sort (key = lambda x : DATASET_ORDER .get (x , 1000 ))
137+ dataset_list .sort (key = lambda x : x .lower ())
152138 return dataset_list
153-
154-
155- DATASET_ORDER = dict (
156- [
157- ("glue" , 0 ),
158- ("squad" , 1 ),
159- ("bookcorpusopen" , 2 ),
160- ("wikipedia" , 3 ),
161- ("wikitext" , 4 ),
162- ("imdb" , 5 ),
163- ("super_glue" , 6 ),
164- ("cnn_dailymail" , 7 ),
165- ("openwebtext" , 8 ),
166- ("common_voice" , 9 ),
167- ("xsum" , 10 ),
168- ("wmt16" , 11 ),
169- ("conll2003" , 12 ),
170- ("ag_news" , 13 ),
171- ("universal_dependencies" , 14 ),
172- ("wiki_qa" , 15 ),
173- ("bookcorpus" , 16 ),
174- ("wiki40b" , 17 ),
175- ("wiki_dpr" , 18 ),
176- ("xnli" , 19 ),
177- ("squad_kor_v1" , 20 ),
178- ("emotion" , 21 ),
179- ("wikiann" , 22 ),
180- ("amazon_us_reviews" , 23 ),
181- ("squad_v2" , 24 ),
182- ("amazon_reviews_multi" , 25 ),
183- ("librispeech_asr" , 26 ),
184- ("blimp" , 27 ),
185- ("scitail" , 28 ),
186- ("anli" , 29 ),
187- ("samsum" , 30 ),
188- ("lambada" , 31 ),
189- ("multi_nli" , 32 ),
190- ("daily_dialog" , 33 ),
191- ("snli" , 34 ),
192- ("opus_euconst" , 35 ),
193- ("rotten_tomatoes" , 36 ),
194- ("scientific_papers" , 37 ),
195- ("trec" , 38 ),
196- ("reddit_tifu" , 39 ),
197- ("ai2_arc" , 40 ),
198- ("patrickvonplaten" , 41 ),
199- ("gigaword" , 42 ),
200- ("swag" , 43 ),
201- ("timit_asr" , 44 ),
202- ("oscar" , 45 ),
203- ("tweet_eval" , 46 ),
204- ("newsgroup" , 47 ),
205- ("billsum" , 48 ),
206- ("gem" , 49 ),
207- ("blended_skill_talk" , 50 ),
208- ("eli5" , 51 ),
209- ("ade_corpus_v2" , 52 ),
210- ("race" , 53 ),
211- ("wikihow" , 54 ),
212- ("piqa" , 55 ),
213- ("xtreme" , 56 ),
214- ("commonsense_qa" , 57 ),
215- ("wiki_snippets" , 58 ),
216- ("mlsum" , 59 ),
217- ("multi_news" , 60 ),
218- ("wmt14" , 61 ),
219- ("asnq" , 62 ),
220- ("toriving" , 63 ),
221- ("crime_and_punish" , 64 ),
222- ("few_rel" , 65 ),
223- ("code_search_net" , 66 ),
224- ("universal_morphologies" , 67 ),
225- ("ms_marco" , 68 ),
226- ("trivia_qa" , 69 ),
227- ("lama" , 70 ),
228- ("newsroom" , 71 ),
229- ("hellaswag" , 72 ),
230- ("adversarial_qa" , 73 ),
231- ("hatexplain" , 74 ),
232- ("hans" , 75 ),
233- ("kilt_tasks" , 76 ),
234- ("xglue" , 77 ),
235- ("amazon_polarity" , 78 ),
236- ("meta_woz" , 79 ),
237- ("opus_books" , 80 ),
238- ("wmt18" , 81 ),
239- ("covid_qa_deepset" , 82 ),
240- ("emotion\\ dataset_infos.json" , 83 ),
241- ("wmt19" , 84 ),
242- ("discofuse" , 85 ),
243- ("mrqa" , 86 ),
244- ("winogrande" , 87 ),
245- ("go_emotions" , 88 ),
246- ("tydiqa" , 89 ),
247- ("yelp_polarity" , 90 ),
248- ("banking77" , 91 ),
249- ("math_dataset" , 92 ),
250- ("pubmed_qa" , 93 ),
251- ("opus_ubuntu" , 94 ),
252- ("acronym_identification" , 95 ),
253- ("math_qa" , 96 ),
254- ("babi_qa" , 97 ),
255- ("dbpedia_14" , 98 ),
256- ("ted_multi" , 99 ),
257- ("allocine" , 100 ),
258- ("hotpot_qa" , 101 ),
259- ("cc_news" , 102 ),
260- ("conll2002" , 103 ),
261- ("cuad" , 104 ),
262- ("mc_taco" , 105 ),
263- ("silicone" , 106 ),
264- ("discovery" , 107 ),
265- ("mt_eng_vietnamese" , 108 ),
266- ("quac" , 109 ),
267- ("conllpp" , 110 ),
268- ("ubuntu_dialogs_corpus" , 111 ),
269- ("esnli" , 112 ),
270- ("doc2dial" , 113 ),
271- ("squad_kor_v2" , 114 ),
272- ("opus_gnome" , 115 ),
273- ("german_legal_entity_recognition" , 116 ),
274- ("openbookqa" , 117 ),
275- ("tapaco" , 118 ),
276- ("xquad_r" , 119 ),
277- ("imdb\\ dataset_infos.json" , 120 ),
278- ("opus_wikipedia" , 121 ),
279- ("amr" , 122 ),
280- ("wnut_17" , 123 ),
281- ("empathetic_dialogues" , 124 ),
282- ("cbt" , 125 ),
283- ("opus_rf" , 126 ),
284- ("narrativeqa" , 127 ),
285- ("mnist" , 128 ),
286- ("sick" , 129 ),
287- ("swda" , 130 ),
288- ("aeslc" , 131 ),
289- ("art" , 132 ),
290- ("coqa" , 133 ),
291- ("opus100" , 134 ),
292- ("sst" , 135 ),
293- ("big_patent" , 136 ),
294- ("germeval_14" , 137 ),
295- ("liar" , 138 ),
296- ("un_pc" , 139 ),
297- ("alt" , 140 ),
298- ("circa" , 141 ),
299- ("scan" , 142 ),
300- ("wikisql" , 143 ),
301- ("reddit" , 144 ),
302- ("wino_bias" , 145 ),
303- ("financial_phrasebank" , 146 ),
304- ("social_i_qa" , 147 ),
305- ("newsqa" , 148 ),
306- ("cosmos_qa" , 149 ),
307- ("classla" , 150 ),
308- ("scicite" , 151 ),
309- ("codah" , 152 ),
310- ("ehealth_kd" , 153 ),
311- ("wikicorpus" , 154 ),
312- ("ccaligned_multilingual" , 155 ),
313- ("cos_e" , 156 ),
314- ("thaisum" , 157 ),
315- ("cfq" , 158 ),
316- ("yahoo_answers_topics" , 159 ),
317- ("wmt" , 160 ),
318- ("natural_questions" , 161 ),
319- ("cc100" , 162 ),
320- ("paws" , 163 ),
321- ("boolq" , 164 ),
322- ("break_data" , 165 ),
323- ("pragmeval" , 166 ),
324- ("arabic_speech_corpus" , 167 ),
325- ("text\\ dataset_infos.json" , 168 ),
326- ("md_gender_bias" , 169 ),
327- ("mlqa" , 170 ),
328- ("arabic_billion_words" , 171 ),
329- ("dialog_re" , 172 ),
330- ("tweets_hate_speech_detection" , 173 ),
331- ("ecthr_cases" , 174 ),
332- ("json\\ dataset_infos.json" , 175 ),
333- ("conv_ai_2" , 176 ),
334- ("dream" , 177 ),
335- ("kor_ner" , 178 ),
336- ("youtube_caption_corrections" , 179 ),
337- ("spider" , 180 ),
338- ("air_dialogue" , 181 ),
339- ("arxiv_dataset" , 182 ),
340- ("data" , 183 ),
341- ("quora" , 184 ),
342- ("docred" , 185 ),
343- ("guardian_authorship" , 186 ),
344- ("quartz" , 187 ),
345- ("yelp_review_full" , 188 ),
346- ("xquad" , 189 ),
347- ("ted_talks_iwslt" , 190 ),
348- ("orange_sum" , 191 ),
349- ("indonlu" , 192 ),
350- ("tweet_qa" , 193 ),
351- ("multi_woz_v22" , 194 ),
352- ("s2orc" , 195 ),
353- ("clarin-pl" , 196 ),
354- ("cord19" , 197 ),
355- ("emo" , 198 ),
356- ("indic_glue" , 199 ),
357- ("ethos" , 200 ),
358- ("persiannlp" , 201 ),
359- ("totto" , 202 ),
360- ("wongnai_reviews" , 203 ),
361- ("bavard" , 204 ),
362- ("europa_ecdc_tm" , 205 ),
363- ("google_wellformed_query" , 206 ),
364- ("paws-x" , 207 ),
365- ("emea" , 208 ),
366- ("fever" , 209 ),
367- ("asset" , 210 ),
368- ("kilt_wikipedia" , 211 ),
369- ("clinc_oos" , 212 ),
370- ("conv_ai_3" , 213 ),
371- ("ncbi_disease" , 214 ),
372- ("sentiment140" , 215 ),
373- ("quarel" , 216 ),
374- ("txt" , 217 ),
375- ("ajgt_twitter_ar" , 218 ),
376- ("ambig_qa" , 219 ),
377- ("ptb_text_only" , 220 ),
378- ("stsb_multi_mt" , 221 ),
379- ("web_questions" , 222 ),
380- ("winograd_wsc" , 223 ),
381- ("eurlex" , 224 ),
382- ("muchocine" , 225 ),
383- ("app_reviews" , 226 ),
384- ("aqua_rat" , 227 ),
385- ("bible_para" , 228 ),
386- ("wiki_auto" , 229 ),
387- ("cifar10" , 230 ),
388- ("eli5\\ dataset_infos.json" , 231 ),
389- ("quail" , 232 ),
390- ("hyperpartisan_news_detection" , 233 ),
391- ]
392- )
0 commit comments