Skip to content
This repository was archived by the owner on Jun 27, 2023. It is now read-only.

Commit c674fb1

Browse files
committed
Added LiveSpeech and AudioFile classes, updated README and a few improvements
1 parent f7a6724 commit c674fb1

File tree

8 files changed

+409
-193
lines changed

8 files changed

+409
-193
lines changed

README.rst

Lines changed: 276 additions & 80 deletions
Large diffs are not rendered by default.

example.py

Lines changed: 0 additions & 13 deletions
This file was deleted.

pocketsphinx/__init__.py

Lines changed: 98 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
import os
3232
import sys
33+
import signal
34+
from contextlib import contextmanager
3335
from sphinxbase import *
3436
from .pocketsphinx import *
3537

@@ -43,21 +45,10 @@ def get_model_path():
4345

4446

4547
def get_data_path():
46-
""" Return path to the model. """
48+
""" Return path to the data. """
4749
return os.path.join(os.path.dirname(__file__), 'data')
4850

4951

50-
class Phrase(object):
51-
52-
def __init__(self, phrase, probability, score):
53-
self.phrase = phrase
54-
self.probability = probability
55-
self.score = score
56-
57-
def __str__(self):
58-
return self.phrase
59-
60-
6152
class Pocketsphinx(Decoder):
6253

6354
def __init__(self, **kwargs):
@@ -98,36 +89,45 @@ def __init__(self, **kwargs):
9889

9990
super(Pocketsphinx, self).__init__(config)
10091

101-
def decode(self, audio=None, max_samples=1024,
102-
no_search=False, full_utt=False, callback=None):
103-
keyphrase = self.get_config().get_string('-keyphrase')
92+
def __str__(self):
93+
return self.hypothesis()
94+
95+
@contextmanager
96+
def start_utterance(self):
10497
self.start_utt()
105-
with open(audio or self.goforward, 'rb') as f:
106-
while True:
107-
buf = f.read(max_samples)
108-
if buf:
109-
self.process_raw(buf, no_search, full_utt)
110-
else:
111-
break
112-
if keyphrase and self.hyp():
113-
self.end_utt()
114-
if callback:
115-
callback(self)
116-
self.start_utt()
98+
yield
11799
self.end_utt()
118100

119-
def phrase(self):
120-
hyp = self.hyp()
121-
if hyp:
122-
return Phrase(hyp.hypstr, hyp.prob, hyp.best_score)
101+
@contextmanager
102+
def end_utterance(self):
103+
self.end_utt()
104+
yield
105+
self.start_utt()
123106

124-
def segments(self):
125-
return [s.word for s in self.seg()]
107+
def decode(self, audio_file=None, buffer_size=2048,
108+
no_search=False, full_utt=False):
109+
buf = bytearray(buffer_size)
110+
with open(audio_file or self.goforward, 'rb') as f:
111+
with self.start_utterance():
112+
while f.readinto(buf):
113+
self.process_raw(buf, no_search, full_utt)
114+
return self
115+
116+
def segments(self, detailed=False):
117+
if detailed:
118+
return [
119+
(s.word, s.prob, s.start_frame, s.end_frame)
120+
for s in self.seg()
121+
]
122+
else:
123+
return [s.word for s in self.seg()]
126124

127125
def hypothesis(self):
128126
hyp = self.hyp()
129127
if hyp:
130128
return hyp.hypstr
129+
else:
130+
return ''
131131

132132
def probability(self):
133133
hyp = self.hyp()
@@ -151,35 +151,75 @@ def confidence(self):
151151
return self.get_logmath().exp(hyp.prob)
152152

153153

154-
class Continuous(Pocketsphinx):
154+
class AudioFile(Pocketsphinx):
155155

156156
def __init__(self, **kwargs):
157-
audio = kwargs.pop('audio', None)
158-
super(Continuous, self).__init__(**kwargs)
159-
self.stream = open(audio or self.goforward, 'rb')
157+
signal.signal(signal.SIGINT, self.stop)
158+
159+
self.audio_file = kwargs.pop('audio_file', None)
160+
self.buffer_size = kwargs.pop('buffer_size', 2048)
161+
self.no_search = kwargs.pop('no_search', False)
162+
self.full_utt = kwargs.pop('full_utt', False)
163+
164+
self.keyphrase = kwargs.get('keyphrase')
165+
160166
self.in_speech = False
161-
self.start_utt()
167+
self.buf = bytearray(self.buffer_size)
168+
169+
super(AudioFile, self).__init__(**kwargs)
170+
171+
self.f = open(self.audio_file or self.goforward, 'rb')
162172

163173
def __iter__(self):
164-
return self
174+
with self.f:
175+
with self.start_utterance():
176+
while self.f.readinto(self.buf):
177+
self.process_raw(self.buf, self.no_search, self.full_utt)
178+
if self.keyphrase and self.hyp():
179+
with self.end_utterance():
180+
yield self
181+
elif self.in_speech != self.get_in_speech():
182+
self.in_speech = self.get_in_speech()
183+
if not self.in_speech and self.hyp():
184+
with self.end_utterance():
185+
yield self
165186

166-
def __next__(self):
167-
while True:
168-
buf = self.stream.read(1024)
169-
if buf:
170-
self.process_raw(buf, False, False)
171-
if self.get_in_speech() != self.in_speech:
172-
self.in_speech = self.get_in_speech()
173-
if not self.in_speech:
174-
self.end_utt()
175-
phrase = self.phrase()
176-
if phrase:
177-
return phrase
178-
self.start_utt()
179-
continue
180-
else:
181-
self.stream.close()
182-
raise StopIteration
187+
def stop(self, *args, **kwargs):
188+
raise StopIteration
189+
190+
191+
class LiveSpeech(Pocketsphinx):
183192

184-
def next(self):
185-
return self.__next__()
193+
def __init__(self, **kwargs):
194+
signal.signal(signal.SIGINT, self.stop)
195+
196+
self.audio_device = kwargs.pop('audio_device', None)
197+
self.sampling_rate = kwargs.pop('sampling_rate', 16000)
198+
self.buffer_size = kwargs.pop('buffer_size', 2048)
199+
self.no_search = kwargs.pop('no_search', False)
200+
self.full_utt = kwargs.pop('full_utt', False)
201+
202+
self.keyphrase = kwargs.get('keyphrase')
203+
204+
self.in_speech = False
205+
self.buf = bytearray(self.buffer_size)
206+
self.ad = Ad(self.audio_device, self.sampling_rate)
207+
208+
super(LiveSpeech, self).__init__(**kwargs)
209+
210+
def __iter__(self):
211+
with self.ad:
212+
with self.start_utterance():
213+
while self.ad.readinto(self.buf) >= 0:
214+
self.process_raw(self.buf, self.no_search, self.full_utt)
215+
if self.keyphrase and self.hyp():
216+
with self.end_utterance():
217+
yield self
218+
elif self.in_speech != self.get_in_speech():
219+
self.in_speech = self.get_in_speech()
220+
if not self.in_speech and self.hyp():
221+
with self.end_utterance():
222+
yield self
223+
224+
def stop(self, *args, **kwargs):
225+
raise StopIteration

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@
168168
'Development Status :: 2 - Pre-Alpha',
169169
'Operating System :: Microsoft :: Windows',
170170
'Operating System :: POSIX :: Linux',
171+
'Operating System :: MacOS',
171172
'License :: OSI Approved :: BSD License',
172173
'Programming Language :: Python :: 2',
173174
'Programming Language :: Python :: 2.7',

swig/sphinxbase/ad.i

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -56,22 +56,20 @@ negative error code."
5656
%include pybuffer.i
5757
%include typemaps.i
5858

59-
%begin %{
60-
#include <Python.h>
61-
#include <sphinxbase/ad.h>
62-
63-
typedef ad_rec_t Ad;
59+
%{
60+
#include <sphinxbase/ad.h>
61+
typedef ad_rec_t Ad;
6462
%}
6563

6664
typedef struct {} Ad;
6765

6866
%extend Ad {
69-
Ad(const char *device=NULL, int32 rate=16000, int *errcode) {
67+
Ad(const char *audio_device=NULL, int sampling_rate=16000, int *errcode) {
7068
Ad *ad;
71-
if (device == NULL)
72-
ad = ad_open_sps(rate);
69+
if (audio_device == NULL)
70+
ad = ad_open_sps(sampling_rate);
7371
else
74-
ad = ad_open_dev(device, rate);
72+
ad = ad_open_dev(audio_device, sampling_rate);
7573
*errcode = ad ? 0 : -1;
7674
return ad;
7775
}
@@ -80,27 +78,27 @@ typedef struct {} Ad;
8078
ad_close($self);
8179
}
8280

83-
Ad * __enter__() {
84-
ad_start_rec($self);
81+
Ad *__enter__(int *errcode) {
82+
*errcode = ad_start_rec($self);
8583
return $self;
8684
}
8785

88-
void __exit__() {
89-
ad_stop_rec($self);
86+
void __exit__(PyObject *exception_type, PyObject *exception_value,
87+
PyObject *exception_traceback, int *errcode) {
88+
*errcode = ad_stop_rec($self);
9089
}
9190

92-
int start_rec(int *errcode) {
91+
int start_recording(int *errcode) {
9392
return *errcode = ad_start_rec($self);
9493
}
9594

96-
int stop_rec(int *errcode) {
95+
int stop_recording(int *errcode) {
9796
return *errcode = ad_stop_rec($self);
9897
}
9998

10099
%include <pybuffer.i>
101-
%pybuffer_mutable_binary(char *SDATA, size_t NSAMP);
102-
int read(char *SDATA, size_t NSAMP, int *errcode) {
103-
NSAMP /= sizeof(int16);
104-
return *errcode = ad_read($self, (int16 *)SDATA, NSAMP);
100+
%pybuffer_mutable_binary(char *DATA, size_t SIZE);
101+
int readinto(char *DATA, size_t SIZE, int *errcode) {
102+
return *errcode = ad_read($self, (int16*)DATA, SIZE /= sizeof(int16));
105103
}
106104
}
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@
2929
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3030
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
from unittest import TestCase
32-
from pocketsphinx import Continuous
32+
from pocketsphinx import AudioFile
3333

3434

35-
class TestContinuous(TestCase):
35+
class TestAudioFile(TestCase):
3636

37-
def test_continuous(self):
38-
phrase = ''
39-
for c in Continuous():
40-
phrase = c.phrase
41-
self.assertEqual(phrase, 'go forward ten meters')
37+
def test_audiofile(self):
38+
hypothesis = ''
39+
for phrase in AudioFile():
40+
hypothesis = str(phrase)
41+
self.assertEqual(hypothesis, 'go forward ten meters')

tests/test_decoder.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,10 @@ class TestCepDecoder(TestCase):
7373
def test_cep_decoder_hypothesis(self):
7474
ps = Pocketsphinx()
7575
with open('deps/pocketsphinx/test/data/goforward.mfc', 'rb') as f:
76-
f.read(4)
77-
buf = f.read(13780)
78-
ps.start_utt()
79-
ps.process_cep(buf, False, True)
80-
ps.end_utt()
76+
with ps.start_utterance():
77+
f.read(4)
78+
buf = f.read(13780)
79+
ps.process_cep(buf, False, True)
8180
self.assertEqual(ps.hypothesis(), 'go forward ten meters')
8281
self.assertEqual(ps.score(), -7095)
8382
self.assertEqual(ps.probability(), -32715)

tests/test_kws.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,14 @@
2929
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3030
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
from unittest import TestCase
32-
from pocketsphinx import Pocketsphinx
32+
from pocketsphinx import AudioFile
3333

3434

3535
class TestKws(TestCase):
3636

3737
def test_kws(self):
38-
def keyphrase(k):
39-
k.keyphrase = [
40-
(s.word, s.prob, s.start_frame, s.end_frame)
41-
for s in k.seg()
42-
]
43-
44-
ps = Pocketsphinx(lm=False, keyphrase='forward', kws_threshold=1e+20)
45-
ps.decode(callback=keyphrase)
46-
47-
self.assertEqual(ps.keyphrase, [('forward', -617, 63, 121)])
38+
segments = []
39+
for phrase in AudioFile(lm=False, keyphrase='forward',
40+
kws_threshold=1e+20):
41+
segments = phrase.segments(detailed=True)
42+
self.assertEqual(segments, [('forward', -617, 63, 121)])

0 commit comments

Comments
 (0)