|
13 | 13 | # See the License for the specific language governing permissions and |
14 | 14 | # limitations under the License. |
15 | 15 | import wave |
16 | | -from prettyparse import create_parser |
| 16 | +from prettyparse import Usage |
17 | 17 | from subprocess import check_output, PIPE |
18 | 18 |
|
19 | 19 | from precise.pocketsphinx.listener import PocketsphinxListener |
20 | | -from precise.scripts.test import show_stats, Stats |
| 20 | +from precise.scripts.base_script import BaseScript |
| 21 | +from precise.scripts.test import Stats |
21 | 22 | from precise.train_data import TrainData |
22 | 23 |
|
23 | | -usage = ''' |
24 | | - Test a dataset using Pocketsphinx |
25 | | - |
26 | | - :key_phrase str |
27 | | - Key phrase composed of words from dictionary |
28 | | - |
29 | | - :dict_file str |
30 | | - Filename of dictionary with word pronunciations |
31 | | - |
32 | | - :hmm_folder str |
33 | | - Folder containing hidden markov model |
34 | | - |
35 | | - :-th --threshold str 1e-90 |
36 | | - Threshold for activations |
37 | | - |
38 | | - :-t --use-train |
39 | | - Evaluate training data instead of test data |
40 | | - |
41 | | - :-nf --no-filenames |
42 | | - Don't show the names of files that failed |
43 | | - |
44 | | - ... |
45 | | -''' |
46 | | - |
47 | | - |
48 | | -def eval_file(filename) -> float: |
49 | | - transcription = check_output( |
50 | | - ['pocketsphinx_continuous', '-kws_threshold', '1e-20', '-keyphrase', 'hey my craft', |
51 | | - '-infile', filename], stderr=PIPE) |
52 | | - return float(bool(transcription) and not transcription.isspace()) |
53 | | - |
54 | | - |
55 | | -def test_pocketsphinx(listener: PocketsphinxListener, data_files) -> Stats: |
56 | | - def run_test(filenames, name): |
| 24 | + |
| 25 | +class PocketsphinxTestScript(BaseScript): |
| 26 | + usage = Usage(''' |
| 27 | + Test a dataset using Pocketsphinx |
| 28 | +
|
| 29 | + :key_phrase str |
| 30 | + Key phrase composed of words from dictionary |
| 31 | +
|
| 32 | + :dict_file str |
| 33 | + Filename of dictionary with word pronunciations |
| 34 | +
|
| 35 | + :hmm_folder str |
| 36 | + Folder containing hidden markov model |
| 37 | +
|
| 38 | + :-th --threshold str 1e-90 |
| 39 | + Threshold for activations |
| 40 | +
|
| 41 | + :-t --use-train |
| 42 | + Evaluate training data instead of test data |
| 43 | +
|
| 44 | + :-nf --no-filenames |
| 45 | + Don't show the names of files that failed |
| 46 | +
|
| 47 | + ... |
| 48 | + ''') | TrainData.usage |
| 49 | + |
| 50 | + def __init__(self, args): |
| 51 | + super().__init__(args) |
| 52 | + self.listener = PocketsphinxListener( |
| 53 | + args.key_phrase, args.dict_file, args.hmm_folder, args.threshold |
| 54 | + ) |
| 55 | + |
| 56 | + self.outputs = [] |
| 57 | + self.targets = [] |
| 58 | + self.filenames = [] |
| 59 | + |
| 60 | + def get_stats(self): |
| 61 | + return Stats(self.outputs, self.targets, self.filenames) |
| 62 | + |
| 63 | + def run(self): |
| 64 | + args = self.args |
| 65 | + data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) |
| 66 | + print('Data:', data) |
| 67 | + |
| 68 | + ww_files, nww_files = data.train_files if args.use_train else data.test_files |
| 69 | + self.run_test(ww_files, 'Wake Word', 1.0) |
| 70 | + self.run_test(nww_files, 'Not Wake Word', 0.0) |
| 71 | + stats = self.get_stats() |
| 72 | + if not self.args.no_filenames: |
| 73 | + fp_files = stats.calc_filenames(False, True, 0.5) |
| 74 | + fn_files = stats.calc_filenames(False, False, 0.5) |
| 75 | + print('=== False Positives ===') |
| 76 | + print('\n'.join(fp_files)) |
| 77 | + print() |
| 78 | + print('=== False Negatives ===') |
| 79 | + print('\n'.join(fn_files)) |
| 80 | + print() |
| 81 | + print(stats.counts_str(0.5)) |
| 82 | + print() |
| 83 | + print(stats.summary_str(0.5)) |
| 84 | + |
| 85 | + def eval_file(self, filename) -> float: |
| 86 | + transcription = check_output( |
| 87 | + ['pocketsphinx_continuous', '-kws_threshold', '1e-20', '-keyphrase', 'hey my craft', |
| 88 | + '-infile', filename], stderr=PIPE) |
| 89 | + return float(bool(transcription) and not transcription.isspace()) |
| 90 | + |
| 91 | + def run_test(self, test_files, label_name, label): |
57 | 92 | print() |
58 | | - print('===', name, '===') |
59 | | - negatives, positives = [], [] |
60 | | - for filename in filenames: |
| 93 | + print('===', label_name, '===') |
| 94 | + for test_file in test_files: |
61 | 95 | try: |
62 | | - with wave.open(filename) as wf: |
| 96 | + with wave.open(test_file) as wf: |
63 | 97 | frames = wf.readframes(wf.getnframes()) |
64 | 98 | except (OSError, EOFError): |
65 | 99 | print('?', end='', flush=True) |
66 | 100 | continue |
67 | | - out = listener.found_wake_word(frames) |
68 | | - {False: negatives, True: positives}[out].append(filename) |
| 101 | + |
| 102 | + out = int(self.listener.found_wake_word(frames)) |
| 103 | + self.outputs.append(out) |
| 104 | + self.targets.append(label) |
| 105 | + self.filenames.append(test_file) |
69 | 106 | print('!' if out else '.', end='', flush=True) |
70 | 107 | print() |
71 | | - return negatives, positives |
72 | | - |
73 | | - false_neg, true_pos = run_test(data_files[0], 'Wake Word') |
74 | | - true_neg, false_pos = run_test(data_files[1], 'Not Wake Word') |
75 | | - return Stats(false_pos, false_neg, true_pos, true_neg) |
76 | | - |
77 | | - |
78 | | -def main(): |
79 | | - args = TrainData.parse_args(create_parser(usage)) |
80 | | - data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) |
81 | | - data_files = data.train_files if args.use_train else data.test_files |
82 | | - listener = PocketsphinxListener( |
83 | | - args.key_phrase, args.dict_file, args.hmm_folder, args.threshold |
84 | | - ) |
85 | 108 |
|
86 | | - print('Data:', data) |
87 | | - stats = test_pocketsphinx(listener, data_files) |
88 | | - show_stats(stats, not args.no_filenames) |
89 | 109 |
|
| 110 | +main = PocketsphinxTestScript.run_main |
90 | 111 |
|
91 | 112 | if __name__ == '__main__': |
92 | 113 | main() |
0 commit comments