Added LiveSpeech and AudioFile classes, updated README and a few improvements

bambocher · bambocher · commit c674fb11e24d · 2016-09-12T03:53:42.000+03:00
diff --git a/README.rst b/README.rst
diff --git a/example.py b/example.py
diff --git a/pocketsphinx/__init__.py b/pocketsphinx/__init__.py
@@ -30,6 +30,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import os
 import sys
+import signal
+from contextlib import contextmanager
 from sphinxbase import *
 from .pocketsphinx import *
 
@@ -43,21 +45,10 @@ def get_model_path():
 
 
 def get_data_path():
-    """ Return path to the model. """
+    """ Return path to the data. """
     return os.path.join(os.path.dirname(__file__), 'data')
 
 
-class Phrase(object):
-
-    def __init__(self, phrase, probability, score):
-        self.phrase = phrase
-        self.probability = probability
-        self.score = score
-
-    def __str__(self):
-        return self.phrase
-
-
 class Pocketsphinx(Decoder):
 
     def __init__(self, **kwargs):
@@ -98,36 +89,45 @@ def __init__(self, **kwargs):
 
         super(Pocketsphinx, self).__init__(config)
 
-    def decode(self, audio=None, max_samples=1024,
-               no_search=False, full_utt=False, callback=None):
-        keyphrase = self.get_config().get_string('-keyphrase')
+    def __str__(self):
+        return self.hypothesis()
+
+    @contextmanager
+    def start_utterance(self):
         self.start_utt()
-        with open(audio or self.goforward, 'rb') as f:
-            while True:
-                buf = f.read(max_samples)
-                if buf:
-                    self.process_raw(buf, no_search, full_utt)
-                else:
-                    break
-                if keyphrase and self.hyp():
-                    self.end_utt()
-                    if callback:
-                        callback(self)
-                    self.start_utt()
+        yield
         self.end_utt()
 
-    def phrase(self):
-        hyp = self.hyp()
-        if hyp:
-            return Phrase(hyp.hypstr, hyp.prob, hyp.best_score)
+    @contextmanager
+    def end_utterance(self):
+        self.end_utt()
+        yield
+        self.start_utt()
 
-    def segments(self):
-        return [s.word for s in self.seg()]
+    def decode(self, audio_file=None, buffer_size=2048,
+               no_search=False, full_utt=False):
+        buf = bytearray(buffer_size)
+        with open(audio_file or self.goforward, 'rb') as f:
+            with self.start_utterance():
+                while f.readinto(buf):
+                    self.process_raw(buf, no_search, full_utt)
+        return self
+
+    def segments(self, detailed=False):
+        if detailed:
+            return [
+                (s.word, s.prob, s.start_frame, s.end_frame)
+                for s in self.seg()
+            ]
+        else:
+            return [s.word for s in self.seg()]
 
     def hypothesis(self):
         hyp = self.hyp()
         if hyp:
             return hyp.hypstr
+        else:
+            return ''
 
     def probability(self):
         hyp = self.hyp()
@@ -151,35 +151,75 @@ def confidence(self):
             return self.get_logmath().exp(hyp.prob)
 
 
-class Continuous(Pocketsphinx):
+class AudioFile(Pocketsphinx):
 
     def __init__(self, **kwargs):
-        audio = kwargs.pop('audio', None)
-        super(Continuous, self).__init__(**kwargs)
-        self.stream = open(audio or self.goforward, 'rb')
+        signal.signal(signal.SIGINT, self.stop)
+
+        self.audio_file = kwargs.pop('audio_file', None)
+        self.buffer_size = kwargs.pop('buffer_size', 2048)
+        self.no_search = kwargs.pop('no_search', False)
+        self.full_utt = kwargs.pop('full_utt', False)
+
+        self.keyphrase = kwargs.get('keyphrase')
+
         self.in_speech = False
-        self.start_utt()
+        self.buf = bytearray(self.buffer_size)
+
+        super(AudioFile, self).__init__(**kwargs)
+
+        self.f = open(self.audio_file or self.goforward, 'rb')
 
     def __iter__(self):
-        return self
+        with self.f:
+            with self.start_utterance():
+                while self.f.readinto(self.buf):
+                    self.process_raw(self.buf, self.no_search, self.full_utt)
+                    if self.keyphrase and self.hyp():
+                        with self.end_utterance():
+                            yield self
+                    elif self.in_speech != self.get_in_speech():
+                        self.in_speech = self.get_in_speech()
+                        if not self.in_speech and self.hyp():
+                            with self.end_utterance():
+                                yield self
 
-    def __next__(self):
-        while True:
-            buf = self.stream.read(1024)
-            if buf:
-                self.process_raw(buf, False, False)
-                if self.get_in_speech() != self.in_speech:
-                    self.in_speech = self.get_in_speech()
-                    if not self.in_speech:
-                        self.end_utt()
-                        phrase = self.phrase()
-                        if phrase:
-                            return phrase
-                        self.start_utt()
-                continue
-            else:
-                self.stream.close()
-                raise StopIteration
+    def stop(self, *args, **kwargs):
+        raise StopIteration
+
+
+class LiveSpeech(Pocketsphinx):
 
-    def next(self):
-        return self.__next__()
+    def __init__(self, **kwargs):
+        signal.signal(signal.SIGINT, self.stop)
+
+        self.audio_device = kwargs.pop('audio_device', None)
+        self.sampling_rate = kwargs.pop('sampling_rate', 16000)
+        self.buffer_size = kwargs.pop('buffer_size', 2048)
+        self.no_search = kwargs.pop('no_search', False)
+        self.full_utt = kwargs.pop('full_utt', False)
+
+        self.keyphrase = kwargs.get('keyphrase')
+
+        self.in_speech = False
+        self.buf = bytearray(self.buffer_size)
+        self.ad = Ad(self.audio_device, self.sampling_rate)
+
+        super(LiveSpeech, self).__init__(**kwargs)
+
+    def __iter__(self):
+        with self.ad:
+            with self.start_utterance():
+                while self.ad.readinto(self.buf) >= 0:
+                    self.process_raw(self.buf, self.no_search, self.full_utt)
+                    if self.keyphrase and self.hyp():
+                        with self.end_utterance():
+                            yield self
+                    elif self.in_speech != self.get_in_speech():
+                        self.in_speech = self.get_in_speech()
+                        if not self.in_speech and self.hyp():
+                            with self.end_utterance():
+                                yield self
+
+    def stop(self, *args, **kwargs):
+        raise StopIteration
diff --git a/setup.py b/setup.py
@@ -168,6 +168,7 @@
         'Development Status :: 2 - Pre-Alpha',
         'Operating System :: Microsoft :: Windows',
         'Operating System :: POSIX :: Linux',
+        'Operating System :: MacOS',
         'License :: OSI Approved :: BSD License',
         'Programming Language :: Python :: 2',
         'Programming Language :: Python :: 2.7',
diff --git a/swig/sphinxbase/ad.i b/swig/sphinxbase/ad.i
@@ -56,22 +56,20 @@ negative error code."
 %include pybuffer.i
 %include typemaps.i
 
-%begin %{
-    #include <Python.h>
-    #include <sphinxbase/ad.h>
-
-    typedef ad_rec_t Ad;
+%{
+#include <sphinxbase/ad.h>
+typedef ad_rec_t Ad;
 %}
 
 typedef struct {} Ad;
 
 %extend Ad {
-    Ad(const char *device=NULL, int32 rate=16000, int *errcode) {
+    Ad(const char *audio_device=NULL, int sampling_rate=16000, int *errcode) {
         Ad *ad;
-        if (device == NULL)
-            ad = ad_open_sps(rate);
+        if (audio_device == NULL)
+            ad = ad_open_sps(sampling_rate);
         else
-            ad = ad_open_dev(device, rate);
+            ad = ad_open_dev(audio_device, sampling_rate);
         *errcode = ad ? 0 : -1;
         return ad;
     }
@@ -80,27 +78,27 @@ typedef struct {} Ad;
         ad_close($self);
     }
 
-    Ad * __enter__() {
-        ad_start_rec($self);
+    Ad *__enter__(int *errcode) {
+        *errcode = ad_start_rec($self);
         return $self;
     }
 
-    void __exit__() {
-        ad_stop_rec($self);
+    void __exit__(PyObject *exception_type, PyObject *exception_value,
+                  PyObject *exception_traceback, int *errcode) {
+        *errcode = ad_stop_rec($self);
     }
 
-    int start_rec(int *errcode) {
+    int start_recording(int *errcode) {
         return *errcode = ad_start_rec($self);
     }
 
-    int stop_rec(int *errcode) {
+    int stop_recording(int *errcode) {
         return *errcode = ad_stop_rec($self);
     }
 
     %include <pybuffer.i>
-    %pybuffer_mutable_binary(char *SDATA, size_t NSAMP);
-    int read(char *SDATA, size_t NSAMP, int *errcode) {
-        NSAMP /= sizeof(int16);
-        return *errcode = ad_read($self, (int16 *)SDATA, NSAMP);
+    %pybuffer_mutable_binary(char *DATA, size_t SIZE);
+    int readinto(char *DATA, size_t SIZE, int *errcode) {
+        return *errcode = ad_read($self, (int16*)DATA, SIZE /= sizeof(int16));
     }
 }
diff --git a/tests/test_audiofile.py b/tests/test_audiofile.py
@@ -29,13 +29,13 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 from unittest import TestCase
-from pocketsphinx import Continuous
+from pocketsphinx import AudioFile
 
 
-class TestContinuous(TestCase):
+class TestAudioFile(TestCase):
 
-    def test_continuous(self):
-        phrase = ''
-        for c in Continuous():
-            phrase = c.phrase
-        self.assertEqual(phrase, 'go forward ten meters')
+    def test_audiofile(self):
+        hypothesis = ''
+        for phrase in AudioFile():
+            hypothesis = str(phrase)
+        self.assertEqual(hypothesis, 'go forward ten meters')
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
@@ -73,11 +73,10 @@ class TestCepDecoder(TestCase):
     def test_cep_decoder_hypothesis(self):
         ps = Pocketsphinx()
         with open('deps/pocketsphinx/test/data/goforward.mfc', 'rb') as f:
-            f.read(4)
-            buf = f.read(13780)
-            ps.start_utt()
-            ps.process_cep(buf, False, True)
-            ps.end_utt()
+            with ps.start_utterance():
+                f.read(4)
+                buf = f.read(13780)
+                ps.process_cep(buf, False, True)
         self.assertEqual(ps.hypothesis(), 'go forward ten meters')
         self.assertEqual(ps.score(), -7095)
         self.assertEqual(ps.probability(), -32715)
diff --git a/tests/test_kws.py b/tests/test_kws.py
@@ -29,19 +29,14 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 from unittest import TestCase
-from pocketsphinx import Pocketsphinx
+from pocketsphinx import AudioFile
 
 
 class TestKws(TestCase):
 
     def test_kws(self):
-        def keyphrase(k):
-            k.keyphrase = [
-                (s.word, s.prob, s.start_frame, s.end_frame)
-                for s in k.seg()
-            ]
-
-        ps = Pocketsphinx(lm=False, keyphrase='forward', kws_threshold=1e+20)
-        ps.decode(callback=keyphrase)
-
-        self.assertEqual(ps.keyphrase, [('forward', -617, 63, 121)])
+        segments = []
+        for phrase in AudioFile(lm=False, keyphrase='forward',
+                                kws_threshold=1e+20):
+            segments = phrase.segments(detailed=True)
+        self.assertEqual(segments, [('forward', -617, 63, 121)])