Skip to content

Commit 8a518eb

Browse files
shanyas10Shanya Sharma - s0s0cr3
andauthored
Remove priority filter (#517)
* templates for tweets_hate_speech * Update templates.yaml * Create templates.yaml * deleting tweets template * removing priority filter * quality changes * Update utils.py Co-authored-by: Shanya Sharma - s0s0cr3 <[email protected]>
1 parent 420e8ed commit 8a518eb

File tree

2 files changed

+2
-283
lines changed

2 files changed

+2
-283
lines changed

promptsource/app.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -216,27 +216,9 @@ def get_infos(d_name):
216216
#
217217
# Loads dataset information
218218
#
219-
if mode == "Prompted dataset viewer":
220-
priority_filter = False
221-
priority_max_templates = None
222-
else: # mode = Sourcing
223-
priority_filter = st.sidebar.checkbox(
224-
"Filter Priority Datasets", help="This filter surfaces datasets with less than X prompts."
225-
)
226-
if priority_filter:
227-
priority_max_templates = st.sidebar.number_input(
228-
"Max no of templates per dataset", min_value=1, max_value=50, value=2, step=1
229-
)
230-
else:
231-
# Clear working priority dataset retained in the
232-
# priority list with more than priority_max_templates
233-
state.working_priority_ds = None
234-
priority_max_templates = None
235219

236220
dataset_list = list_datasets(
237221
template_collection,
238-
priority_filter,
239-
priority_max_templates,
240222
state,
241223
)
242224

@@ -251,12 +233,6 @@ def get_infos(d_name):
251233
help="Select the dataset to work on.",
252234
)
253235

254-
if mode == "Sourcing":
255-
# On dataset change, clear working priority dataset
256-
# retained in the priority list with more than priority_max_templates
257-
if dataset_key != state.working_priority_ds:
258-
state.working_priority_ds = None
259-
260236
#
261237
# If a particular dataset is selected, loads dataset and template information
262238
#
@@ -447,9 +423,6 @@ def get_infos(d_name):
447423
dataset_templates.add_template(template)
448424
reset_template_state()
449425
state.template_name = new_template_name
450-
# Keep the current working dataset in priority list
451-
if priority_filter:
452-
state.working_priority_ds = dataset_key
453426
else:
454427
state.new_template_name = None
455428

promptsource/utils.py

Lines changed: 2 additions & 256 deletions
Original file line numberDiff line numberDiff line change
@@ -131,262 +131,8 @@ def filter_english_datasets():
131131
return sorted(english_datasets)
132132

133133

134-
def list_datasets(template_collection, _priority_filter, _priority_max_templates, _state):
134+
def list_datasets(template_collection, _state):
135135
"""Get all the datasets to work with."""
136136
dataset_list = filter_english_datasets()
137-
count_dict = template_collection.get_templates_count()
138-
if _priority_filter:
139-
dataset_list = list(
140-
set(dataset_list)
141-
- set(
142-
list(
143-
d
144-
for d in count_dict
145-
if count_dict[d] > _priority_max_templates and d != _state.working_priority_ds
146-
)
147-
)
148-
)
149-
dataset_list.sort()
150-
else:
151-
dataset_list.sort(key=lambda x: DATASET_ORDER.get(x, 1000))
137+
dataset_list.sort(key=lambda x: x.lower())
152138
return dataset_list
153-
154-
155-
DATASET_ORDER = dict(
156-
[
157-
("glue", 0),
158-
("squad", 1),
159-
("bookcorpusopen", 2),
160-
("wikipedia", 3),
161-
("wikitext", 4),
162-
("imdb", 5),
163-
("super_glue", 6),
164-
("cnn_dailymail", 7),
165-
("openwebtext", 8),
166-
("common_voice", 9),
167-
("xsum", 10),
168-
("wmt16", 11),
169-
("conll2003", 12),
170-
("ag_news", 13),
171-
("universal_dependencies", 14),
172-
("wiki_qa", 15),
173-
("bookcorpus", 16),
174-
("wiki40b", 17),
175-
("wiki_dpr", 18),
176-
("xnli", 19),
177-
("squad_kor_v1", 20),
178-
("emotion", 21),
179-
("wikiann", 22),
180-
("amazon_us_reviews", 23),
181-
("squad_v2", 24),
182-
("amazon_reviews_multi", 25),
183-
("librispeech_asr", 26),
184-
("blimp", 27),
185-
("scitail", 28),
186-
("anli", 29),
187-
("samsum", 30),
188-
("lambada", 31),
189-
("multi_nli", 32),
190-
("daily_dialog", 33),
191-
("snli", 34),
192-
("opus_euconst", 35),
193-
("rotten_tomatoes", 36),
194-
("scientific_papers", 37),
195-
("trec", 38),
196-
("reddit_tifu", 39),
197-
("ai2_arc", 40),
198-
("patrickvonplaten", 41),
199-
("gigaword", 42),
200-
("swag", 43),
201-
("timit_asr", 44),
202-
("oscar", 45),
203-
("tweet_eval", 46),
204-
("newsgroup", 47),
205-
("billsum", 48),
206-
("gem", 49),
207-
("blended_skill_talk", 50),
208-
("eli5", 51),
209-
("ade_corpus_v2", 52),
210-
("race", 53),
211-
("wikihow", 54),
212-
("piqa", 55),
213-
("xtreme", 56),
214-
("commonsense_qa", 57),
215-
("wiki_snippets", 58),
216-
("mlsum", 59),
217-
("multi_news", 60),
218-
("wmt14", 61),
219-
("asnq", 62),
220-
("toriving", 63),
221-
("crime_and_punish", 64),
222-
("few_rel", 65),
223-
("code_search_net", 66),
224-
("universal_morphologies", 67),
225-
("ms_marco", 68),
226-
("trivia_qa", 69),
227-
("lama", 70),
228-
("newsroom", 71),
229-
("hellaswag", 72),
230-
("adversarial_qa", 73),
231-
("hatexplain", 74),
232-
("hans", 75),
233-
("kilt_tasks", 76),
234-
("xglue", 77),
235-
("amazon_polarity", 78),
236-
("meta_woz", 79),
237-
("opus_books", 80),
238-
("wmt18", 81),
239-
("covid_qa_deepset", 82),
240-
("emotion\\dataset_infos.json", 83),
241-
("wmt19", 84),
242-
("discofuse", 85),
243-
("mrqa", 86),
244-
("winogrande", 87),
245-
("go_emotions", 88),
246-
("tydiqa", 89),
247-
("yelp_polarity", 90),
248-
("banking77", 91),
249-
("math_dataset", 92),
250-
("pubmed_qa", 93),
251-
("opus_ubuntu", 94),
252-
("acronym_identification", 95),
253-
("math_qa", 96),
254-
("babi_qa", 97),
255-
("dbpedia_14", 98),
256-
("ted_multi", 99),
257-
("allocine", 100),
258-
("hotpot_qa", 101),
259-
("cc_news", 102),
260-
("conll2002", 103),
261-
("cuad", 104),
262-
("mc_taco", 105),
263-
("silicone", 106),
264-
("discovery", 107),
265-
("mt_eng_vietnamese", 108),
266-
("quac", 109),
267-
("conllpp", 110),
268-
("ubuntu_dialogs_corpus", 111),
269-
("esnli", 112),
270-
("doc2dial", 113),
271-
("squad_kor_v2", 114),
272-
("opus_gnome", 115),
273-
("german_legal_entity_recognition", 116),
274-
("openbookqa", 117),
275-
("tapaco", 118),
276-
("xquad_r", 119),
277-
("imdb\\dataset_infos.json", 120),
278-
("opus_wikipedia", 121),
279-
("amr", 122),
280-
("wnut_17", 123),
281-
("empathetic_dialogues", 124),
282-
("cbt", 125),
283-
("opus_rf", 126),
284-
("narrativeqa", 127),
285-
("mnist", 128),
286-
("sick", 129),
287-
("swda", 130),
288-
("aeslc", 131),
289-
("art", 132),
290-
("coqa", 133),
291-
("opus100", 134),
292-
("sst", 135),
293-
("big_patent", 136),
294-
("germeval_14", 137),
295-
("liar", 138),
296-
("un_pc", 139),
297-
("alt", 140),
298-
("circa", 141),
299-
("scan", 142),
300-
("wikisql", 143),
301-
("reddit", 144),
302-
("wino_bias", 145),
303-
("financial_phrasebank", 146),
304-
("social_i_qa", 147),
305-
("newsqa", 148),
306-
("cosmos_qa", 149),
307-
("classla", 150),
308-
("scicite", 151),
309-
("codah", 152),
310-
("ehealth_kd", 153),
311-
("wikicorpus", 154),
312-
("ccaligned_multilingual", 155),
313-
("cos_e", 156),
314-
("thaisum", 157),
315-
("cfq", 158),
316-
("yahoo_answers_topics", 159),
317-
("wmt", 160),
318-
("natural_questions", 161),
319-
("cc100", 162),
320-
("paws", 163),
321-
("boolq", 164),
322-
("break_data", 165),
323-
("pragmeval", 166),
324-
("arabic_speech_corpus", 167),
325-
("text\\dataset_infos.json", 168),
326-
("md_gender_bias", 169),
327-
("mlqa", 170),
328-
("arabic_billion_words", 171),
329-
("dialog_re", 172),
330-
("tweets_hate_speech_detection", 173),
331-
("ecthr_cases", 174),
332-
("json\\dataset_infos.json", 175),
333-
("conv_ai_2", 176),
334-
("dream", 177),
335-
("kor_ner", 178),
336-
("youtube_caption_corrections", 179),
337-
("spider", 180),
338-
("air_dialogue", 181),
339-
("arxiv_dataset", 182),
340-
("data", 183),
341-
("quora", 184),
342-
("docred", 185),
343-
("guardian_authorship", 186),
344-
("quartz", 187),
345-
("yelp_review_full", 188),
346-
("xquad", 189),
347-
("ted_talks_iwslt", 190),
348-
("orange_sum", 191),
349-
("indonlu", 192),
350-
("tweet_qa", 193),
351-
("multi_woz_v22", 194),
352-
("s2orc", 195),
353-
("clarin-pl", 196),
354-
("cord19", 197),
355-
("emo", 198),
356-
("indic_glue", 199),
357-
("ethos", 200),
358-
("persiannlp", 201),
359-
("totto", 202),
360-
("wongnai_reviews", 203),
361-
("bavard", 204),
362-
("europa_ecdc_tm", 205),
363-
("google_wellformed_query", 206),
364-
("paws-x", 207),
365-
("emea", 208),
366-
("fever", 209),
367-
("asset", 210),
368-
("kilt_wikipedia", 211),
369-
("clinc_oos", 212),
370-
("conv_ai_3", 213),
371-
("ncbi_disease", 214),
372-
("sentiment140", 215),
373-
("quarel", 216),
374-
("txt", 217),
375-
("ajgt_twitter_ar", 218),
376-
("ambig_qa", 219),
377-
("ptb_text_only", 220),
378-
("stsb_multi_mt", 221),
379-
("web_questions", 222),
380-
("winograd_wsc", 223),
381-
("eurlex", 224),
382-
("muchocine", 225),
383-
("app_reviews", 226),
384-
("aqua_rat", 227),
385-
("bible_para", 228),
386-
("wiki_auto", 229),
387-
("cifar10", 230),
388-
("eli5\\dataset_infos.json", 231),
389-
("quail", 232),
390-
("hyperpartisan_news_detection", 233),
391-
]
392-
)

0 commit comments

Comments
 (0)