|
13 | 13 |
|
14 | 14 | from sota_extractor2.pipeline_logger import pipeline_logger
|
15 | 15 |
|
16 |
| -metrics = { |
17 |
| - 'BLEU': ['bleu'], |
18 |
| - 'BLEU score': ['bleu'], |
19 |
| - 'Character Error Rate': ['cer', 'cers'], |
20 |
| - 'Error': ['error'], |
21 |
| - 'Exact Match Ratio': ['exact match'], |
22 |
| - 'F1': ['f1', 'f1 score'], |
23 |
| - 'F1 score': ['f1', 'f1 score'], |
24 |
| - 'MAP': ['map'], |
25 |
| - 'Percentage error': ['wer', 'per', 'wers', 'pers', 'word error rate', 'word error rates', 'phoneme error rates', |
26 |
| - 'phoneme error rate', 'error', 'error rate', 'error rates'], |
27 |
| - 'Word Error Rate': ['wer', 'wers', 'word error rate', 'word error rates', 'error', 'error rate', 'error rates'], |
28 |
| - 'Word Error Rate (WER)': ['wer', 'wers', 'word error rate', 'word error rates', 'error', 'error rate', 'error rates'], |
29 |
| - 'ROUGE-1': ['r1'], |
30 |
| - 'ROUGE-2': ['r2'], |
31 |
| - 'ROUGE-F': ['rf'], |
32 |
| - 'Precision': ['precision'], |
33 |
| - 'Recall': ['recall'], |
34 |
| - # RAIN REMOVAL |
35 |
| - 'PSNR': ['psnr', 'psnr (db)', 'mean psnr'], |
36 |
| - 'SSIM': ['ssim'], |
37 |
| - 'UQI': ['uqi'], |
38 |
| - 'VIF': ['vif'], |
39 |
| - 'SSEQ': ['sseq'], |
40 |
| - 'NIQE': ['niqe'], |
41 |
| - 'BLINDS-II': ['blinds-ii'], |
42 |
| - 'FSIM': ['fsim'], |
43 |
| - # SEMANTIC SEGMENTATION |
44 |
| - 'Mean iOU': ['miou', 'mean iou', 'mean iu'], |
45 |
| - 'Pixel Accuracy': ['pixel accuracy', 'pixel acc', 'pixel acc.'], |
46 |
| - 'Class iOU': ['class iou', 'iou cla.'], |
47 |
| - 'Category iOU': ['cat iou', 'iou cat.'], |
48 |
| - 'Class iiOU': ['class iiou', 'iiou cla.'], |
49 |
| - 'Category iiOU': ['cat iiou', 'iiou cat.'], |
50 |
| -} |
51 |
| - |
52 |
| -# datasets[taxonomy name] is a list of normalized evidences for taxonomy name |
53 |
| -datasets = { |
54 |
| - 'Hub5\'00 Average': ['avg', 'full', 'hub5', 'sum', 'evaluation'], |
55 |
| - 'Hub5\'00 Switchboard': ['swbd', 'swb', 'hub5 swb', 'hub5 swbd', 'switchboard'], |
56 |
| - 'Hub5\'00 CallHome': ['ch', 'hub5 ch', 'call home', 'chm'], |
57 |
| - 'TIMIT': ['timit'], |
58 |
| - 'WSJ eval92': ['wsj eval 92', 'eval 92', 'wsj'], |
59 |
| - 'WSJ eval93': ['wsj eval 93', 'eval 93', 'wsj'], |
60 |
| - 'LibriSpeech test-clean': ['libri speech test clean', 'libri speech', 'test', 'tst', 'clean', 'test clean'], |
61 |
| - 'LibriSpeech test-other': ['libri speech test other', 'libri speech', 'test', 'tst', 'other', 'test other', |
62 |
| - 'noisy'], |
63 |
| - 'Babel Cebuano': ['babel cebuano', 'babel', 'cebuano', 'ceb'], |
64 |
| - 'Babel Kazakh': ['babel kazakh', 'babel', 'kazakh', 'kaz'], |
65 |
| - 'Babel Kurmanji': ['babel kurmanji', 'babel', 'kurmanji', 'kur'], |
66 |
| - 'Babel Lithuanian': ['babel lithuanian', 'babel', 'lithuanian', 'lit'], |
67 |
| - 'Babel Telugu': ['babel telugu', 'babel', 'telugu', 'tel'], |
68 |
| - 'Babel Tok Pisin': ['babel tok pisin', 'babel', 'tok pisin', 'tok'], |
69 |
| - |
70 |
| - 'Ask Ubuntu': ['ask ubuntu', 'ask u', 'ubuntu'], |
71 |
| - 'Chatbot': ['chatbot'], |
72 |
| - 'Web Apps': ['web apps'], |
73 |
| - 'CHiME clean': ['chime clean', 'chime', 'clean'], |
74 |
| - 'CHiME real': ['chime real', 'chime', 'real'], |
75 |
| - 'CHiME simu': ['chime simu', 'chime', 'simu', 'sim', 'simulated'], |
76 |
| - 'CHiME-4 real 6ch': ['chime 4 real 6 ch', 'chime 4', 'real', '6 channel'], |
77 |
| - 'AG News': ['ag news', 'ag'], |
78 |
| - 'GigaWord': ['gigaword', 'giga'], |
79 |
| - 'GEOTEXT': ['geotext', 'geo'], |
80 |
| - 'IWSLT 2015 English-Vietnamese': ["iwslt 2015 english vietnamese", "iwslt", "2015", "english vietnamese", "en vi", |
81 |
| - "iwslt 15 english vietnamese", "iwslt 15 en vi", "english", "en", "vietnamese", |
82 |
| - "vi"], |
83 |
| - 'IWSLT2011 English TED Talks': ["iwslt 2011 english ted talks", "iwslt", "2011", "english", "en", "eng", "ted", |
84 |
| - "ted talks", "english ted talks"], |
85 |
| - 'IWSLT2012 English TED Talks': ["iwslt 2012 english ted talks", "iwslt", "2012", "english", "en", "eng", "ted", |
86 |
| - "ted talks", "english ted talks"], |
87 |
| - 'IWSLT2014 English-German': ["iwslt 2014 english german", "iwslt", "2014", "english german", "en de", "en", "de", |
88 |
| - "english", "german"], |
89 |
| - 'Rich Transcription 2002': ["rich transcription 2002", "rich transcription 02", "rt 2002", "2002", "rt 02", "rich", |
90 |
| - "transcription"], |
91 |
| - 'Rich Transcription 2003': ["richt ranscription 2003", "rich transcription 03", "rt 2003", "2003", "rt 03", "rich", |
92 |
| - "transcription"], |
93 |
| - 'Rich Transcription 2004': ["rich transcription 2004", "rich transcription 04", "rt 2004", "2004", "rt 04", "rich", |
94 |
| - "transcription"], |
95 |
| - 'DIRHA English WSJ real': ['dirha english wsj real', 'dirha', 'english', 'en', 'eng', 'real', 'wsj'], |
96 |
| - 'DIRHA English WSJ simu': ['dirha english wsj simu', 'dirha', 'english', 'en', 'eng', 'simu', 'wsj', 'simulated'], |
97 |
| - 'VCTK clean': ["vctk clean", "vctk", "clean"], |
98 |
| - 'VCTK noisy': ["vctk noisy", "vctk", "noisy"], |
99 |
| - 'VoxForge American-Canadian': ["vox forge american canadian", "vox forge", "vox", "forge", "american canadian", |
100 |
| - "american", "canadian", "us ca"], |
101 |
| - 'VoxForge Commonwealth': ["vox forge common wealth", "vox forge", "common wealth", "vox", "forge", "common", |
102 |
| - "wealth"], |
103 |
| - 'VoxForge European': ["vox forge european", "vox forge", "european", "vox", "forge", "eu"], |
104 |
| - 'VoxForge Indian': ["vox forge indian", "vox forge", "indian", "vox", "forge"], |
105 |
| - # RAIN REMOVAL |
106 |
| - 'Raindrop': ['raindrop'], |
107 |
| - 'Rain100H': ['rain100h'], |
108 |
| - 'Rain100L': ['rain100l'], |
109 |
| - 'Rain12': ['rain12'], |
110 |
| - 'Rain800': ['rain800'], |
111 |
| - 'Rain1400': ['rain1400'], |
112 |
| - 'Real Rain': ['real rain'], |
113 |
| - 'Rain in Surveillance': ['ris'], |
114 |
| - 'Rain in Driving': ['rid'], |
115 |
| - 'DID-MDN': ['did-mdn'], |
116 |
| - 'SOTS': ['sots'], |
117 |
| - 'Test 1': ['test 1'], |
118 |
| - 'RainSynLight25': ['rainsynlight25'], |
119 |
| - 'RainSynComplex25': ['rainsyncomplex25'], |
120 |
| - 'NTURain': ['nturain'], |
121 |
| - 'RainSynAll100': ['rainsynall100'], |
122 |
| - 'SPA-DATA': ['spa-data'], |
123 |
| - 'LasVR': ['lasvar'], |
124 |
| - # SEMANTIC SEGMENTATION |
125 |
| - 'PASCAL VOC 2012': ['voc 2012', 'pascal voc 2012'], |
126 |
| - 'ADE20K': ['ade20k'], |
127 |
| - 'ImageNet': ['imagenet'], |
128 |
| - 'Cityscapes': ['cityscapes'], |
129 |
| - 'PASCAL-Context': ['pascal-context'], |
130 |
| - 'PASCAL-Person-Part': ['pascal-person-part'], |
131 |
| - 'ParseNet': ['parsenet'], |
132 |
| - 'LIP': ['lip'], |
133 |
| -} |
| 16 | +from sota_extractor2.models.linking.manual_dicts import metrics, datasets, tasks |
134 | 17 |
|
135 | 18 | datasets = {k:(v+['test']) for k,v in datasets.items()}
|
136 | 19 | datasets.update({
|
137 | 20 | 'LibriSpeech dev-clean': ['libri speech dev clean', 'libri speech', 'dev', 'clean', 'dev clean', 'development'],
|
138 | 21 | 'LibriSpeech dev-other': ['libri speech dev other', 'libri speech', 'dev', 'other', 'dev other', 'development', 'noisy'],
|
139 | 22 | })
|
140 | 23 |
|
141 |
| -tasks = { |
142 |
| - 'Speech Recognition': ['speech recognition'] |
143 |
| -} |
144 |
| - |
145 | 24 | # escaped_ws_re = re.compile(r'\\\s+')
|
146 | 25 | # def name_to_re(name):
|
147 | 26 | # return re.compile(r'(?:^|\s+)' + escaped_ws_re.sub(r'\\s*', re.escape(name.strip())) + r'(?:$|\s+)', re.I)
|
|
0 commit comments