3030# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131import os
3232import sys
33+ import signal
34+ from contextlib import contextmanager
3335from sphinxbase import *
3436from .pocketsphinx import *
3537
@@ -43,21 +45,10 @@ def get_model_path():
4345
4446
4547def get_data_path ():
46- """ Return path to the model . """
48+ """ Return path to the data . """
4749 return os .path .join (os .path .dirname (__file__ ), 'data' )
4850
4951
50- class Phrase (object ):
51-
52- def __init__ (self , phrase , probability , score ):
53- self .phrase = phrase
54- self .probability = probability
55- self .score = score
56-
57- def __str__ (self ):
58- return self .phrase
59-
60-
6152class Pocketsphinx (Decoder ):
6253
6354 def __init__ (self , ** kwargs ):
@@ -98,36 +89,45 @@ def __init__(self, **kwargs):
9889
9990 super (Pocketsphinx , self ).__init__ (config )
10091
101- def decode (self , audio = None , max_samples = 1024 ,
102- no_search = False , full_utt = False , callback = None ):
103- keyphrase = self .get_config ().get_string ('-keyphrase' )
92+ def __str__ (self ):
93+ return self .hypothesis ()
94+
95+ @contextmanager
96+ def start_utterance (self ):
10497 self .start_utt ()
105- with open (audio or self .goforward , 'rb' ) as f :
106- while True :
107- buf = f .read (max_samples )
108- if buf :
109- self .process_raw (buf , no_search , full_utt )
110- else :
111- break
112- if keyphrase and self .hyp ():
113- self .end_utt ()
114- if callback :
115- callback (self )
116- self .start_utt ()
98+ yield
11799 self .end_utt ()
118100
119- def phrase (self ):
120- hyp = self .hyp ()
121- if hyp :
122- return Phrase (hyp .hypstr , hyp .prob , hyp .best_score )
101+ @contextmanager
102+ def end_utterance (self ):
103+ self .end_utt ()
104+ yield
105+ self .start_utt ()
123106
124- def segments (self ):
125- return [s .word for s in self .seg ()]
107+ def decode (self , audio_file = None , buffer_size = 2048 ,
108+ no_search = False , full_utt = False ):
109+ buf = bytearray (buffer_size )
110+ with open (audio_file or self .goforward , 'rb' ) as f :
111+ with self .start_utterance ():
112+ while f .readinto (buf ):
113+ self .process_raw (buf , no_search , full_utt )
114+ return self
115+
116+ def segments (self , detailed = False ):
117+ if detailed :
118+ return [
119+ (s .word , s .prob , s .start_frame , s .end_frame )
120+ for s in self .seg ()
121+ ]
122+ else :
123+ return [s .word for s in self .seg ()]
126124
127125 def hypothesis (self ):
128126 hyp = self .hyp ()
129127 if hyp :
130128 return hyp .hypstr
129+ else :
130+ return ''
131131
132132 def probability (self ):
133133 hyp = self .hyp ()
@@ -151,35 +151,75 @@ def confidence(self):
151151 return self .get_logmath ().exp (hyp .prob )
152152
153153
154- class Continuous (Pocketsphinx ):
154+ class AudioFile (Pocketsphinx ):
155155
156156 def __init__ (self , ** kwargs ):
157- audio = kwargs .pop ('audio' , None )
158- super (Continuous , self ).__init__ (** kwargs )
159- self .stream = open (audio or self .goforward , 'rb' )
157+ signal .signal (signal .SIGINT , self .stop )
158+
159+ self .audio_file = kwargs .pop ('audio_file' , None )
160+ self .buffer_size = kwargs .pop ('buffer_size' , 2048 )
161+ self .no_search = kwargs .pop ('no_search' , False )
162+ self .full_utt = kwargs .pop ('full_utt' , False )
163+
164+ self .keyphrase = kwargs .get ('keyphrase' )
165+
160166 self .in_speech = False
161- self .start_utt ()
167+ self .buf = bytearray (self .buffer_size )
168+
169+ super (AudioFile , self ).__init__ (** kwargs )
170+
171+ self .f = open (self .audio_file or self .goforward , 'rb' )
162172
163173 def __iter__ (self ):
164- return self
174+ with self .f :
175+ with self .start_utterance ():
176+ while self .f .readinto (self .buf ):
177+ self .process_raw (self .buf , self .no_search , self .full_utt )
178+ if self .keyphrase and self .hyp ():
179+ with self .end_utterance ():
180+ yield self
181+ elif self .in_speech != self .get_in_speech ():
182+ self .in_speech = self .get_in_speech ()
183+ if not self .in_speech and self .hyp ():
184+ with self .end_utterance ():
185+ yield self
165186
166- def __next__ (self ):
167- while True :
168- buf = self .stream .read (1024 )
169- if buf :
170- self .process_raw (buf , False , False )
171- if self .get_in_speech () != self .in_speech :
172- self .in_speech = self .get_in_speech ()
173- if not self .in_speech :
174- self .end_utt ()
175- phrase = self .phrase ()
176- if phrase :
177- return phrase
178- self .start_utt ()
179- continue
180- else :
181- self .stream .close ()
182- raise StopIteration
187+ def stop (self , * args , ** kwargs ):
188+ raise StopIteration
189+
190+
191+ class LiveSpeech (Pocketsphinx ):
183192
184- def next (self ):
185- return self .__next__ ()
193+ def __init__ (self , ** kwargs ):
194+ signal .signal (signal .SIGINT , self .stop )
195+
196+ self .audio_device = kwargs .pop ('audio_device' , None )
197+ self .sampling_rate = kwargs .pop ('sampling_rate' , 16000 )
198+ self .buffer_size = kwargs .pop ('buffer_size' , 2048 )
199+ self .no_search = kwargs .pop ('no_search' , False )
200+ self .full_utt = kwargs .pop ('full_utt' , False )
201+
202+ self .keyphrase = kwargs .get ('keyphrase' )
203+
204+ self .in_speech = False
205+ self .buf = bytearray (self .buffer_size )
206+ self .ad = Ad (self .audio_device , self .sampling_rate )
207+
208+ super (LiveSpeech , self ).__init__ (** kwargs )
209+
210+ def __iter__ (self ):
211+ with self .ad :
212+ with self .start_utterance ():
213+ while self .ad .readinto (self .buf ) >= 0 :
214+ self .process_raw (self .buf , self .no_search , self .full_utt )
215+ if self .keyphrase and self .hyp ():
216+ with self .end_utterance ():
217+ yield self
218+ elif self .in_speech != self .get_in_speech ():
219+ self .in_speech = self .get_in_speech ()
220+ if not self .in_speech and self .hyp ():
221+ with self .end_utterance ():
222+ yield self
223+
224+ def stop (self , * args , ** kwargs ):
225+ raise StopIteration
0 commit comments