Skip to content

Commit 04f0894

Browse files
Merge pull request #102 from MycroftAI/feature/script-overhaul
Feature/script overhaul
2 parents fb452ca + 6196bdc commit 04f0894

30 files changed

+1226
-866
lines changed

precise/functions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,6 @@ def asigmoid(x):
8282

8383
def pdf(x, mu, std):
8484
"""Probability density function (normal distribution)"""
85+
if std == 0:
86+
return 0
8587
return (1.0 / (std * sqrt(2 * pi))) * np.exp(-(x - mu) ** 2 / (2 * std ** 2))

precise/pocketsphinx/scripts/listen.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,56 +12,56 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
from prettyparse import create_parser
16-
from random import randint
15+
from precise_runner import PreciseRunner
16+
from precise_runner.runner import ListenerEngine
17+
from prettyparse import Usage
1718
from threading import Event
1819

1920
from precise.pocketsphinx.listener import PocketsphinxListener
21+
from precise.scripts.base_script import BaseScript
2022
from precise.util import activate_notify
21-
from precise_runner import PreciseRunner
22-
from precise_runner.runner import ListenerEngine
2323

24-
usage = '''
25-
Run Pocketsphinx on microphone audio input
26-
27-
:key_phrase str
28-
Key phrase composed of words from dictionary
29-
30-
:dict_file str
31-
Filename of dictionary with word pronunciations
32-
33-
:hmm_folder str
34-
Folder containing hidden markov model
35-
36-
:-th --threshold str 1e-90
37-
Threshold for activations
38-
39-
:-c --chunk-size int 2048
40-
Samples between inferences
41-
'''
4224

43-
session_id, chunk_num = '%09d' % randint(0, 999999999), 0
25+
class PocketsphinxListenScript(BaseScript):
26+
usage = Usage('''
27+
Run Pocketsphinx on microphone audio input
28+
29+
:key_phrase str
30+
Key phrase composed of words from dictionary
31+
32+
:dict_file str
33+
Filename of dictionary with word pronunciations
34+
35+
:hmm_folder str
36+
Folder containing hidden markov model
37+
38+
:-th --threshold str 1e-90
39+
Threshold for activations
4440
41+
:-c --chunk-size int 2048
42+
Samples between inferences
43+
''')
4544

46-
def main():
47-
args = create_parser(usage).parse_args()
45+
def run(self):
46+
def on_activation():
47+
activate_notify()
4848

49-
def on_activation():
50-
activate_notify()
49+
def on_prediction(conf):
50+
print('!' if conf > 0.5 else '.', end='', flush=True)
5151

52-
def on_prediction(conf):
53-
print('!' if conf > 0.5 else '.', end='', flush=True)
52+
args = self.args
53+
runner = PreciseRunner(
54+
ListenerEngine(
55+
PocketsphinxListener(
56+
args.key_phrase, args.dict_file, args.hmm_folder, args.threshold, args.chunk_size
57+
)
58+
), 3, on_activation=on_activation, on_prediction=on_prediction
59+
)
60+
runner.start()
61+
Event().wait() # Wait forever
5462

55-
runner = PreciseRunner(
56-
ListenerEngine(
57-
PocketsphinxListener(
58-
args.key_phrase, args.dict_file, args.hmm_folder, args.threshold, args.chunk_size
59-
)
60-
), 3, on_activation=on_activation, on_prediction=on_prediction
61-
)
62-
runner.start()
63-
Event().wait() # Wait forever
6463

64+
main = PocketsphinxListenScript.run_main
6565

6666
if __name__ == '__main__':
6767
main()

precise/pocketsphinx/scripts/test.py

Lines changed: 80 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -13,80 +13,101 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
import wave
16-
from prettyparse import create_parser
16+
from prettyparse import Usage
1717
from subprocess import check_output, PIPE
1818

1919
from precise.pocketsphinx.listener import PocketsphinxListener
20-
from precise.scripts.test import show_stats, Stats
20+
from precise.scripts.base_script import BaseScript
21+
from precise.scripts.test import Stats
2122
from precise.train_data import TrainData
2223

23-
usage = '''
24-
Test a dataset using Pocketsphinx
25-
26-
:key_phrase str
27-
Key phrase composed of words from dictionary
28-
29-
:dict_file str
30-
Filename of dictionary with word pronunciations
31-
32-
:hmm_folder str
33-
Folder containing hidden markov model
34-
35-
:-th --threshold str 1e-90
36-
Threshold for activations
37-
38-
:-t --use-train
39-
Evaluate training data instead of test data
40-
41-
:-nf --no-filenames
42-
Don't show the names of files that failed
43-
44-
...
45-
'''
46-
47-
48-
def eval_file(filename) -> float:
49-
transcription = check_output(
50-
['pocketsphinx_continuous', '-kws_threshold', '1e-20', '-keyphrase', 'hey my craft',
51-
'-infile', filename], stderr=PIPE)
52-
return float(bool(transcription) and not transcription.isspace())
53-
54-
55-
def test_pocketsphinx(listener: PocketsphinxListener, data_files) -> Stats:
56-
def run_test(filenames, name):
24+
25+
class PocketsphinxTestScript(BaseScript):
26+
usage = Usage('''
27+
Test a dataset using Pocketsphinx
28+
29+
:key_phrase str
30+
Key phrase composed of words from dictionary
31+
32+
:dict_file str
33+
Filename of dictionary with word pronunciations
34+
35+
:hmm_folder str
36+
Folder containing hidden markov model
37+
38+
:-th --threshold str 1e-90
39+
Threshold for activations
40+
41+
:-t --use-train
42+
Evaluate training data instead of test data
43+
44+
:-nf --no-filenames
45+
Don't show the names of files that failed
46+
47+
...
48+
''') | TrainData.usage
49+
50+
def __init__(self, args):
51+
super().__init__(args)
52+
self.listener = PocketsphinxListener(
53+
args.key_phrase, args.dict_file, args.hmm_folder, args.threshold
54+
)
55+
56+
self.outputs = []
57+
self.targets = []
58+
self.filenames = []
59+
60+
def get_stats(self):
61+
return Stats(self.outputs, self.targets, self.filenames)
62+
63+
def run(self):
64+
args = self.args
65+
data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
66+
print('Data:', data)
67+
68+
ww_files, nww_files = data.train_files if args.use_train else data.test_files
69+
self.run_test(ww_files, 'Wake Word', 1.0)
70+
self.run_test(nww_files, 'Not Wake Word', 0.0)
71+
stats = self.get_stats()
72+
if not self.args.no_filenames:
73+
fp_files = stats.calc_filenames(False, True, 0.5)
74+
fn_files = stats.calc_filenames(False, False, 0.5)
75+
print('=== False Positives ===')
76+
print('\n'.join(fp_files))
77+
print()
78+
print('=== False Negatives ===')
79+
print('\n'.join(fn_files))
80+
print()
81+
print(stats.counts_str(0.5))
82+
print()
83+
print(stats.summary_str(0.5))
84+
85+
def eval_file(self, filename) -> float:
86+
transcription = check_output(
87+
['pocketsphinx_continuous', '-kws_threshold', '1e-20', '-keyphrase', 'hey my craft',
88+
'-infile', filename], stderr=PIPE)
89+
return float(bool(transcription) and not transcription.isspace())
90+
91+
def run_test(self, test_files, label_name, label):
5792
print()
58-
print('===', name, '===')
59-
negatives, positives = [], []
60-
for filename in filenames:
93+
print('===', label_name, '===')
94+
for test_file in test_files:
6195
try:
62-
with wave.open(filename) as wf:
96+
with wave.open(test_file) as wf:
6397
frames = wf.readframes(wf.getnframes())
6498
except (OSError, EOFError):
6599
print('?', end='', flush=True)
66100
continue
67-
out = listener.found_wake_word(frames)
68-
{False: negatives, True: positives}[out].append(filename)
101+
102+
out = int(self.listener.found_wake_word(frames))
103+
self.outputs.append(out)
104+
self.targets.append(label)
105+
self.filenames.append(test_file)
69106
print('!' if out else '.', end='', flush=True)
70107
print()
71-
return negatives, positives
72-
73-
false_neg, true_pos = run_test(data_files[0], 'Wake Word')
74-
true_neg, false_pos = run_test(data_files[1], 'Not Wake Word')
75-
return Stats(false_pos, false_neg, true_pos, true_neg)
76-
77-
78-
def main():
79-
args = TrainData.parse_args(create_parser(usage))
80-
data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
81-
data_files = data.train_files if args.use_train else data.test_files
82-
listener = PocketsphinxListener(
83-
args.key_phrase, args.dict_file, args.hmm_folder, args.threshold
84-
)
85108

86-
print('Data:', data)
87-
stats = test_pocketsphinx(listener, data_files)
88-
show_stats(stats, not args.no_filenames)
89109

110+
main = PocketsphinxTestScript.run_main
90111

91112
if __name__ == '__main__':
92113
main()

0 commit comments

Comments
 (0)