Skip to content

Commit 1726526

Browse files
author
Marcin Kardas
committed
Refactor manual dictionaries
1 parent d55768e commit 1726526

File tree

2 files changed

+123
-122
lines changed

2 files changed

+123
-122
lines changed

sota_extractor2/models/linking/context_search.py

Lines changed: 1 addition & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -13,135 +13,14 @@
1313

1414
from sota_extractor2.pipeline_logger import pipeline_logger
1515

16-
metrics = {
17-
'BLEU': ['bleu'],
18-
'BLEU score': ['bleu'],
19-
'Character Error Rate': ['cer', 'cers'],
20-
'Error': ['error'],
21-
'Exact Match Ratio': ['exact match'],
22-
'F1': ['f1', 'f1 score'],
23-
'F1 score': ['f1', 'f1 score'],
24-
'MAP': ['map'],
25-
'Percentage error': ['wer', 'per', 'wers', 'pers', 'word error rate', 'word error rates', 'phoneme error rates',
26-
'phoneme error rate', 'error', 'error rate', 'error rates'],
27-
'Word Error Rate': ['wer', 'wers', 'word error rate', 'word error rates', 'error', 'error rate', 'error rates'],
28-
'Word Error Rate (WER)': ['wer', 'wers', 'word error rate', 'word error rates', 'error', 'error rate', 'error rates'],
29-
'ROUGE-1': ['r1'],
30-
'ROUGE-2': ['r2'],
31-
'ROUGE-F': ['rf'],
32-
'Precision': ['precision'],
33-
'Recall': ['recall'],
34-
# RAIN REMOVAL
35-
'PSNR': ['psnr', 'psnr (db)', 'mean psnr'],
36-
'SSIM': ['ssim'],
37-
'UQI': ['uqi'],
38-
'VIF': ['vif'],
39-
'SSEQ': ['sseq'],
40-
'NIQE': ['niqe'],
41-
'BLINDS-II': ['blinds-ii'],
42-
'FSIM': ['fsim'],
43-
# SEMANTIC SEGMENTATION
44-
'Mean iOU': ['miou', 'mean iou', 'mean iu'],
45-
'Pixel Accuracy': ['pixel accuracy', 'pixel acc', 'pixel acc.'],
46-
'Class iOU': ['class iou', 'iou cla.'],
47-
'Category iOU': ['cat iou', 'iou cat.'],
48-
'Class iiOU': ['class iiou', 'iiou cla.'],
49-
'Category iiOU': ['cat iiou', 'iiou cat.'],
50-
}
51-
52-
# datasets[taxonomy name] is a list of normalized evidences for taxonomy name
53-
datasets = {
54-
'Hub5\'00 Average': ['avg', 'full', 'hub5', 'sum', 'evaluation'],
55-
'Hub5\'00 Switchboard': ['swbd', 'swb', 'hub5 swb', 'hub5 swbd', 'switchboard'],
56-
'Hub5\'00 CallHome': ['ch', 'hub5 ch', 'call home', 'chm'],
57-
'TIMIT': ['timit'],
58-
'WSJ eval92': ['wsj eval 92', 'eval 92', 'wsj'],
59-
'WSJ eval93': ['wsj eval 93', 'eval 93', 'wsj'],
60-
'LibriSpeech test-clean': ['libri speech test clean', 'libri speech', 'test', 'tst', 'clean', 'test clean'],
61-
'LibriSpeech test-other': ['libri speech test other', 'libri speech', 'test', 'tst', 'other', 'test other',
62-
'noisy'],
63-
'Babel Cebuano': ['babel cebuano', 'babel', 'cebuano', 'ceb'],
64-
'Babel Kazakh': ['babel kazakh', 'babel', 'kazakh', 'kaz'],
65-
'Babel Kurmanji': ['babel kurmanji', 'babel', 'kurmanji', 'kur'],
66-
'Babel Lithuanian': ['babel lithuanian', 'babel', 'lithuanian', 'lit'],
67-
'Babel Telugu': ['babel telugu', 'babel', 'telugu', 'tel'],
68-
'Babel Tok Pisin': ['babel tok pisin', 'babel', 'tok pisin', 'tok'],
69-
70-
'Ask Ubuntu': ['ask ubuntu', 'ask u', 'ubuntu'],
71-
'Chatbot': ['chatbot'],
72-
'Web Apps': ['web apps'],
73-
'CHiME clean': ['chime clean', 'chime', 'clean'],
74-
'CHiME real': ['chime real', 'chime', 'real'],
75-
'CHiME simu': ['chime simu', 'chime', 'simu', 'sim', 'simulated'],
76-
'CHiME-4 real 6ch': ['chime 4 real 6 ch', 'chime 4', 'real', '6 channel'],
77-
'AG News': ['ag news', 'ag'],
78-
'GigaWord': ['gigaword', 'giga'],
79-
'GEOTEXT': ['geotext', 'geo'],
80-
'IWSLT 2015 English-Vietnamese': ["iwslt 2015 english vietnamese", "iwslt", "2015", "english vietnamese", "en vi",
81-
"iwslt 15 english vietnamese", "iwslt 15 en vi", "english", "en", "vietnamese",
82-
"vi"],
83-
'IWSLT2011 English TED Talks': ["iwslt 2011 english ted talks", "iwslt", "2011", "english", "en", "eng", "ted",
84-
"ted talks", "english ted talks"],
85-
'IWSLT2012 English TED Talks': ["iwslt 2012 english ted talks", "iwslt", "2012", "english", "en", "eng", "ted",
86-
"ted talks", "english ted talks"],
87-
'IWSLT2014 English-German': ["iwslt 2014 english german", "iwslt", "2014", "english german", "en de", "en", "de",
88-
"english", "german"],
89-
'Rich Transcription 2002': ["rich transcription 2002", "rich transcription 02", "rt 2002", "2002", "rt 02", "rich",
90-
"transcription"],
91-
'Rich Transcription 2003': ["richt ranscription 2003", "rich transcription 03", "rt 2003", "2003", "rt 03", "rich",
92-
"transcription"],
93-
'Rich Transcription 2004': ["rich transcription 2004", "rich transcription 04", "rt 2004", "2004", "rt 04", "rich",
94-
"transcription"],
95-
'DIRHA English WSJ real': ['dirha english wsj real', 'dirha', 'english', 'en', 'eng', 'real', 'wsj'],
96-
'DIRHA English WSJ simu': ['dirha english wsj simu', 'dirha', 'english', 'en', 'eng', 'simu', 'wsj', 'simulated'],
97-
'VCTK clean': ["vctk clean", "vctk", "clean"],
98-
'VCTK noisy': ["vctk noisy", "vctk", "noisy"],
99-
'VoxForge American-Canadian': ["vox forge american canadian", "vox forge", "vox", "forge", "american canadian",
100-
"american", "canadian", "us ca"],
101-
'VoxForge Commonwealth': ["vox forge common wealth", "vox forge", "common wealth", "vox", "forge", "common",
102-
"wealth"],
103-
'VoxForge European': ["vox forge european", "vox forge", "european", "vox", "forge", "eu"],
104-
'VoxForge Indian': ["vox forge indian", "vox forge", "indian", "vox", "forge"],
105-
# RAIN REMOVAL
106-
'Raindrop': ['raindrop'],
107-
'Rain100H': ['rain100h'],
108-
'Rain100L': ['rain100l'],
109-
'Rain12': ['rain12'],
110-
'Rain800': ['rain800'],
111-
'Rain1400': ['rain1400'],
112-
'Real Rain': ['real rain'],
113-
'Rain in Surveillance': ['ris'],
114-
'Rain in Driving': ['rid'],
115-
'DID-MDN': ['did-mdn'],
116-
'SOTS': ['sots'],
117-
'Test 1': ['test 1'],
118-
'RainSynLight25': ['rainsynlight25'],
119-
'RainSynComplex25': ['rainsyncomplex25'],
120-
'NTURain': ['nturain'],
121-
'RainSynAll100': ['rainsynall100'],
122-
'SPA-DATA': ['spa-data'],
123-
'LasVR': ['lasvar'],
124-
# SEMANTIC SEGMENTATION
125-
'PASCAL VOC 2012': ['voc 2012', 'pascal voc 2012'],
126-
'ADE20K': ['ade20k'],
127-
'ImageNet': ['imagenet'],
128-
'Cityscapes': ['cityscapes'],
129-
'PASCAL-Context': ['pascal-context'],
130-
'PASCAL-Person-Part': ['pascal-person-part'],
131-
'ParseNet': ['parsenet'],
132-
'LIP': ['lip'],
133-
}
16+
from sota_extractor2.models.linking.manual_dicts import metrics, datasets, tasks
13417

13518
datasets = {k:(v+['test']) for k,v in datasets.items()}
13619
datasets.update({
13720
'LibriSpeech dev-clean': ['libri speech dev clean', 'libri speech', 'dev', 'clean', 'dev clean', 'development'],
13821
'LibriSpeech dev-other': ['libri speech dev other', 'libri speech', 'dev', 'other', 'dev other', 'development', 'noisy'],
13922
})
14023

141-
tasks = {
142-
'Speech Recognition': ['speech recognition']
143-
}
144-
14524
# escaped_ws_re = re.compile(r'\\\s+')
14625
# def name_to_re(name):
14726
# return re.compile(r'(?:^|\s+)' + escaped_ws_re.sub(r'\\s*', re.escape(name.strip())) + r'(?:$|\s+)', re.I)
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
metrics = {
2+
'BLEU': ['bleu'],
3+
'BLEU score': ['bleu'],
4+
'Character Error Rate': ['cer', 'cers'],
5+
'Error': ['error'],
6+
'Exact Match Ratio': ['exact match'],
7+
'F1': ['f1', 'f1 score'],
8+
'F1 score': ['f1', 'f1 score'],
9+
'MAP': ['map'],
10+
'Percentage error': ['wer', 'per', 'wers', 'pers', 'word error rate', 'word error rates', 'phoneme error rates',
11+
'phoneme error rate', 'error', 'error rate', 'error rates'],
12+
'Word Error Rate': ['wer', 'wers', 'word error rate', 'word error rates', 'error', 'error rate', 'error rates'],
13+
'Word Error Rate (WER)': ['wer', 'wers', 'word error rate', 'word error rates', 'error', 'error rate', 'error rates'],
14+
'ROUGE-1': ['r1'],
15+
'ROUGE-2': ['r2'],
16+
'ROUGE-F': ['rf'],
17+
'Precision': ['precision'],
18+
'Recall': ['recall'],
19+
# RAIN REMOVAL
20+
'PSNR': ['psnr', 'psnr (db)', 'mean psnr'],
21+
'SSIM': ['ssim'],
22+
'UQI': ['uqi'],
23+
'VIF': ['vif'],
24+
'SSEQ': ['sseq'],
25+
'NIQE': ['niqe'],
26+
'BLINDS-II': ['blinds-ii'],
27+
'FSIM': ['fsim'],
28+
# SEMANTIC SEGMENTATION
29+
'Mean iOU': ['miou', 'mean iou', 'mean iu'],
30+
'Pixel Accuracy': ['pixel accuracy', 'pixel acc', 'pixel acc.'],
31+
'Class iOU': ['class iou', 'iou cla.'],
32+
'Category iOU': ['cat iou', 'iou cat.'],
33+
'Class iiOU': ['class iiou', 'iiou cla.'],
34+
'Category iiOU': ['cat iiou', 'iiou cat.'],
35+
}
36+
37+
# datasets[taxonomy name] is a list of normalized evidences for taxonomy name
38+
datasets = {
39+
'Hub5\'00 Average': ['avg', 'full', 'hub5', 'sum', 'evaluation'],
40+
'Hub5\'00 Switchboard': ['swbd', 'swb', 'hub5 swb', 'hub5 swbd', 'switchboard'],
41+
'Hub5\'00 CallHome': ['ch', 'hub5 ch', 'call home', 'chm'],
42+
'TIMIT': ['timit'],
43+
'WSJ eval92': ['wsj eval 92', 'eval 92', 'wsj'],
44+
'WSJ eval93': ['wsj eval 93', 'eval 93', 'wsj'],
45+
'LibriSpeech test-clean': ['libri speech test clean', 'libri speech', 'test', 'tst', 'clean', 'test clean'],
46+
'LibriSpeech test-other': ['libri speech test other', 'libri speech', 'test', 'tst', 'other', 'test other',
47+
'noisy'],
48+
'Babel Cebuano': ['babel cebuano', 'babel', 'cebuano', 'ceb'],
49+
'Babel Kazakh': ['babel kazakh', 'babel', 'kazakh', 'kaz'],
50+
'Babel Kurmanji': ['babel kurmanji', 'babel', 'kurmanji', 'kur'],
51+
'Babel Lithuanian': ['babel lithuanian', 'babel', 'lithuanian', 'lit'],
52+
'Babel Telugu': ['babel telugu', 'babel', 'telugu', 'tel'],
53+
'Babel Tok Pisin': ['babel tok pisin', 'babel', 'tok pisin', 'tok'],
54+
55+
'Ask Ubuntu': ['ask ubuntu', 'ask u', 'ubuntu'],
56+
'Chatbot': ['chatbot'],
57+
'Web Apps': ['web apps'],
58+
'CHiME clean': ['chime clean', 'chime', 'clean'],
59+
'CHiME real': ['chime real', 'chime', 'real'],
60+
'CHiME simu': ['chime simu', 'chime', 'simu', 'sim', 'simulated'],
61+
'CHiME-4 real 6ch': ['chime 4 real 6 ch', 'chime 4', 'real', '6 channel'],
62+
'AG News': ['ag news', 'ag'],
63+
'GigaWord': ['gigaword', 'giga'],
64+
'GEOTEXT': ['geotext', 'geo'],
65+
'IWSLT 2015 English-Vietnamese': ["iwslt 2015 english vietnamese", "iwslt", "2015", "english vietnamese", "en vi",
66+
"iwslt 15 english vietnamese", "iwslt 15 en vi", "english", "en", "vietnamese",
67+
"vi"],
68+
'IWSLT2011 English TED Talks': ["iwslt 2011 english ted talks", "iwslt", "2011", "english", "en", "eng", "ted",
69+
"ted talks", "english ted talks"],
70+
'IWSLT2012 English TED Talks': ["iwslt 2012 english ted talks", "iwslt", "2012", "english", "en", "eng", "ted",
71+
"ted talks", "english ted talks"],
72+
'IWSLT2014 English-German': ["iwslt 2014 english german", "iwslt", "2014", "english german", "en de", "en", "de",
73+
"english", "german"],
74+
'Rich Transcription 2002': ["rich transcription 2002", "rich transcription 02", "rt 2002", "2002", "rt 02", "rich",
75+
"transcription"],
76+
'Rich Transcription 2003': ["richt ranscription 2003", "rich transcription 03", "rt 2003", "2003", "rt 03", "rich",
77+
"transcription"],
78+
'Rich Transcription 2004': ["rich transcription 2004", "rich transcription 04", "rt 2004", "2004", "rt 04", "rich",
79+
"transcription"],
80+
'DIRHA English WSJ real': ['dirha english wsj real', 'dirha', 'english', 'en', 'eng', 'real', 'wsj'],
81+
'DIRHA English WSJ simu': ['dirha english wsj simu', 'dirha', 'english', 'en', 'eng', 'simu', 'wsj', 'simulated'],
82+
'VCTK clean': ["vctk clean", "vctk", "clean"],
83+
'VCTK noisy': ["vctk noisy", "vctk", "noisy"],
84+
'VoxForge American-Canadian': ["vox forge american canadian", "vox forge", "vox", "forge", "american canadian",
85+
"american", "canadian", "us ca"],
86+
'VoxForge Commonwealth': ["vox forge common wealth", "vox forge", "common wealth", "vox", "forge", "common",
87+
"wealth"],
88+
'VoxForge European': ["vox forge european", "vox forge", "european", "vox", "forge", "eu"],
89+
'VoxForge Indian': ["vox forge indian", "vox forge", "indian", "vox", "forge"],
90+
# RAIN REMOVAL
91+
'Raindrop': ['raindrop'],
92+
'Rain100H': ['rain100h'],
93+
'Rain100L': ['rain100l'],
94+
'Rain12': ['rain12'],
95+
'Rain800': ['rain800'],
96+
'Rain1400': ['rain1400'],
97+
'Real Rain': ['real rain'],
98+
'Rain in Surveillance': ['ris'],
99+
'Rain in Driving': ['rid'],
100+
'DID-MDN': ['did-mdn'],
101+
'SOTS': ['sots'],
102+
'Test 1': ['test 1'],
103+
'RainSynLight25': ['rainsynlight25'],
104+
'RainSynComplex25': ['rainsyncomplex25'],
105+
'NTURain': ['nturain'],
106+
'RainSynAll100': ['rainsynall100'],
107+
'SPA-DATA': ['spa-data'],
108+
'LasVR': ['lasvar'],
109+
# SEMANTIC SEGMENTATION
110+
'PASCAL VOC 2012': ['voc 2012', 'pascal voc 2012'],
111+
'ADE20K': ['ade20k'],
112+
'ImageNet': ['imagenet'],
113+
'Cityscapes': ['cityscapes'],
114+
'PASCAL-Context': ['pascal-context'],
115+
'PASCAL-Person-Part': ['pascal-person-part'],
116+
'ParseNet': ['parsenet'],
117+
'LIP': ['lip'],
118+
}
119+
120+
tasks = {
121+
'Speech Recognition': ['speech recognition']
122+
}

0 commit comments

Comments
 (0)