77import usb .core
88import usb .util
99import pyaudio
10+ import wave
1011import math
1112import numpy as np
1213import tf .transformations as T
1516import struct
1617import sys
1718import time
19+ import speech_recognition as SR
1820from audio_common_msgs .msg import AudioData
1921from geometry_msgs .msg import PoseStamped
2022from std_msgs .msg import Bool , Int32 , ColorRGBA
@@ -169,12 +171,14 @@ def read(self, name):
169171 usb .util .CTRL_IN | usb .util .CTRL_TYPE_VENDOR | usb .util .CTRL_RECIPIENT_DEVICE ,
170172 0 , cmd , id , length , self .TIMEOUT )
171173
174+ #response = struct.unpack(b'ii', struct.pack(b'ii',*response))
172175 response = struct .unpack (b'ii' , bytes ([x for x in response ]))
173176
174177 if data [2 ] == 'int' :
175178 result = response [0 ]
176179 else :
177180 result = response [0 ] * (2. ** response [1 ])
181+ #result = response[0]
178182
179183 return result
180184
@@ -224,6 +228,7 @@ def __init__(self, on_audio, channels=None, suppress_error=True):
224228 self .rate = 16000
225229 self .bitwidth = 2
226230 self .bitdepth = 16
231+ self .i = 0
227232
228233 # find device
229234 count = self .pyaudio .get_device_count ()
@@ -266,6 +271,7 @@ def __init__(self, on_audio, channels=None, suppress_error=True):
266271 input_device_index = self .device_index ,
267272 )
268273
274+
269275 def __del__ (self ):
270276 self .stop ()
271277 try :
@@ -279,15 +285,25 @@ def __del__(self):
279285 except :
280286 pass
281287
288+
282289 def stream_callback (self , in_data , frame_count , time_info , status ):
290+
283291 # split channel
284292 data = np .frombuffer (in_data , dtype = np .int16 )
285293 chunk_per_channel = np .math .ceil (len (data ) / self .available_channels )
286294 data = np .reshape (data , (chunk_per_channel , self .available_channels ))
287295 for chan in self .channels :
288- chan_data = data [:, chan ]
296+ chan_data = bytearray (data [:, chan ].tobytes ())
297+
298+ ## save this chunk to file
299+ #self.i += 1
300+ #sound_data = SR.AudioData(chan_data, self.rate, self.bitwidth)
301+ #with open(str(chan) + " one chunck " + str(self.i) + ".wav","wb") as f:
302+ # f.write(sound_data.get_wav_data())
303+
289304 # invoke callback
290- self .on_audio (str (chan_data ), chan )
305+ self .on_audio (chan_data , chan )
306+
291307 return None , pyaudio .paContinue
292308
293309 def start (self ):
@@ -315,7 +331,7 @@ def __init__(self):
315331 #
316332 self .respeaker = RespeakerInterface ()
317333 self .respeaker_audio = RespeakerAudio (self .on_audio , suppress_error = suppress_pyaudio_error )
318- self .speech_audio_buffer = str ()
334+ self .speech_audio_buffer = bytearray ()
319335 self .is_speeching = False
320336 self .speech_stopped = rospy .Time (0 )
321337 self .prev_is_voice = None
@@ -333,16 +349,20 @@ def __init__(self):
333349 # start
334350 self .speech_prefetch_bytes = int (
335351 self .speech_prefetch * self .respeaker_audio .rate * self .respeaker_audio .bitdepth / 8.0 )
336- self .speech_prefetch_buffer = str ()
352+ self .speech_prefetch_buffer = bytearray ()
337353 self .respeaker_audio .start ()
338354 self .info_timer = rospy .Timer (rospy .Duration (1.0 / self .update_rate ),
339355 self .on_timer )
340356 self .timer_led = None
341357 self .sub_led = rospy .Subscriber ("status_led" , ColorRGBA , self .on_status_led )
358+ self .big_data0 = []
359+ self .out = wave .open ("/home/pi/Desktop/test.wav" , 'wb' )
360+ self .out .setparams ((1 , 2 , 16000 , 1024 , "NONE" , "NONE" ))
342361
343362 def on_shutdown (self ):
344363 try :
345364 self .respeaker .close ()
365+ self .out .close ()
346366 except :
347367 pass
348368 finally :
@@ -377,15 +397,19 @@ def on_status_led(self, msg):
377397 oneshot = True )
378398
379399 def on_audio (self , data , channel ):
400+
401+ if channel == 0 :
402+ self .out .writeframes (data )
403+
380404 self .pub_audios [channel ].publish (AudioData (data = data ))
381405 if channel == self .main_channel :
382406 self .pub_audio .publish (AudioData (data = data ))
383407 if self .is_speeching :
384408 if len (self .speech_audio_buffer ) == 0 :
385409 self .speech_audio_buffer = self .speech_prefetch_buffer
386- self .speech_audio_buffer += data
410+ for x in data : self .speech_audio_buffer += bytearray ([ x ])
387411 else :
388- self .speech_prefetch_buffer += data
412+ for x in data : self .speech_prefetch_buffer += bytearray ([ x ])
389413 self .speech_prefetch_buffer = self .speech_prefetch_buffer [- self .speech_prefetch_bytes :]
390414
391415 def on_timer (self , event ):
@@ -427,12 +451,12 @@ def on_timer(self, event):
427451 buf = self .speech_audio_buffer
428452 self .speech_audio_buffer = str ()
429453 self .is_speeching = False
430- duration = 8.0 * len (buf ) * self .respeaker_audio .bitwidth
454+ duration = len (buf ) * self .respeaker_audio .bitwidth * 8.0
431455 duration = duration / self .respeaker_audio .rate / self .respeaker_audio .bitdepth
432456 rospy .loginfo ("Speech detected for %.3f seconds" % duration )
433457 if self .speech_min_duration <= duration < self .speech_max_duration :
434458
435- self .pub_speech_audio .publish (AudioData (data = bytes (buf , "utf8" )))
459+ self .pub_speech_audio .publish (AudioData (data = list (buf )))
436460
437461
438462if __name__ == '__main__' :
0 commit comments