Skip to content

Commit e73482b

Browse files
added timeout on speech recognition
1 parent 415fb74 commit e73482b

File tree

4 files changed

+37
-13
lines changed

4 files changed

+37
-13
lines changed

CMakeLists.txt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 2.8.3)
22
project(respeaker_ros)
33

44
find_package(catkin REQUIRED COMPONENTS
5+
rospy
6+
message_generation
57
catkin_virtualenv
68
dynamic_reconfigure)
79
if ($ENV{ROS_PYTHON_VERSION} EQUAL 2)
@@ -12,10 +14,21 @@ else()
1214
"roscd respeaker_ros && pip install -r requirements.txt")
1315
endif()
1416

17+
add_message_files(
18+
DIRECTORY
19+
msg
20+
FILES
21+
StampedAudio.msg
22+
)
23+
24+
generate_messages()
25+
1526
generate_dynamic_reconfigure_options(
1627
cfg/Respeaker.cfg)
1728

18-
catkin_package()
29+
catkin_package(
30+
CATKIN_DEPENDS message_runtime
31+
)
1932

2033
if (catkin_virtualenv_FOUND)
2134
catkin_generate_virtualenv()
@@ -37,4 +50,4 @@ install(FILES requirements.txt
3750
if (CATKIN_ENABLE_TESTING)
3851
catkin_add_nosetests(test/test_installed.py
3952
DEPENDENCIES ${PROJECT_NAME}_generate_virtualenv)
40-
endif()
53+
endif()

package.xml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
<name>respeaker_ros</name>
44
<version>0.0.0</version>
55
<description>The respeaker_ros package</description>
6-
<maintainer email="furushchev@jsk.imi.i.u-tokyo.ac.jp">Yuki Furuta</maintainer>
6+
<maintainer email="zacharykratochvil@gmail.com">Zachary Kratochvil</maintainer>
77
<license>Apache</license>
88
<author email="furushchev@jsk.imi.i.u-tokyo.ac.jp">Yuki Furuta</author>
99

1010
<buildtool_depend>catkin</buildtool_depend>
11+
<build_depend>message_generation</build_depend>
12+
1113
<depend condition="$ROS_PYTHON_VERSION == 3">catkin_virtualenv</depend>
1214
<depend>angles</depend>
1315
<depend>audio_common_msgs</depend>
@@ -20,7 +22,11 @@
2022
<depend>speech_recognition_msgs</depend>
2123
<depend>std_msgs</depend>
2224
<depend>tf</depend>
25+
2326
<export>
2427
<pip_requirements>requirements.txt</pip_requirements>
2528
</export>
29+
30+
<exec_depend>message_runtime</exec_depend>
31+
2632
</package>

scripts/respeaker_node.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import sys
1818
import time
1919
import speech_recognition as SR
20-
from audio_common_msgs.msg import AudioData
20+
from respeaker_ros.msg import StampedAudio
2121
from geometry_msgs.msg import PoseStamped
2222
from std_msgs.msg import Bool, Int32, ColorRGBA
2323
from dynamic_reconfigure.server import Server
@@ -340,9 +340,9 @@ def __init__(self):
340340
self.pub_vad = rospy.Publisher("is_speeching", Bool, queue_size=1, latch=True)
341341
self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
342342
self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
343-
self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
344-
self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
345-
self.pub_audios = {c:rospy.Publisher('audio/channel%d' % c, AudioData, queue_size=10) for c in self.respeaker_audio.channels}
343+
self.pub_audio = rospy.Publisher("audio", StampedAudio, queue_size=10)
344+
self.pub_speech_audio = rospy.Publisher("speech_audio", StampedAudio, queue_size=2)
345+
self.pub_audios = {c:rospy.Publisher('audio/channel%d' % c, StampedAudio, queue_size=10) for c in self.respeaker_audio.channels}
346346
# init config
347347
self.config = None
348348
self.dyn_srv = Server(RespeakerConfig, self.on_config)
@@ -401,9 +401,9 @@ def on_audio(self, data, channel):
401401
if channel == 0:
402402
self.out.writeframes(data)
403403

404-
self.pub_audios[channel].publish(AudioData(data=data))
404+
self.pub_audios[channel].publish(StampedAudio(data=data, stamp=rospy.get_rostime()))
405405
if channel == self.main_channel:
406-
self.pub_audio.publish(AudioData(data=data))
406+
self.pub_audio.publish(StampedAudio(data=data, stamp=rospy.get_rostime()))
407407
if self.is_speeching:
408408
if len(self.speech_audio_buffer) == 0:
409409
self.speech_audio_buffer = self.speech_prefetch_buffer
@@ -456,7 +456,7 @@ def on_timer(self, event):
456456
rospy.loginfo("Speech detected for %.3f seconds" % duration)
457457
if self.speech_min_duration <= duration < self.speech_max_duration:
458458

459-
self.pub_speech_audio.publish(AudioData(data=list(buf)))
459+
self.pub_speech_audio.publish(StampedAudio(data=list(buf), stamp=rospy.get_rostime()))
460460

461461

462462
if __name__ == '__main__':

scripts/speech_to_text.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import speech_recognition as SR
88

99
from actionlib_msgs.msg import GoalStatus, GoalStatusArray
10-
from audio_common_msgs.msg import AudioData
10+
from respeaker_ros.msg import StampedAudio
1111
from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
1212
from speech_recognition_msgs.msg import SpeechRecognitionCandidates
1313

@@ -40,8 +40,8 @@ def __init__(self):
4040
self.tts_action = None
4141

4242
self.pub_speech = rospy.Publisher(
43-
"speech_to_text", SpeechRecognitionCandidates, queue_size=1)
44-
self.sub_audio = rospy.Subscriber("speech_audio", AudioData, self.audio_cb)
43+
"speech_to_text", SpeechRecognitionCandidates, queue_size=2)
44+
self.sub_audio = rospy.Subscriber("speech_audio", StampedAudio, self.audio_cb, queue_size=2)
4545

4646
def tts_timer_cb(self, event):
4747
stamp = event.current_real
@@ -63,9 +63,14 @@ def tts_timer_cb(self, event):
6363
self.is_canceling = Falser
6464

6565
def audio_cb(self, msg):
66+
6667
if self.is_canceling:
6768
rospy.loginfo("Speech is cancelled")
6869
return
70+
if rospy.get_rostime().secs - msg.stamp.secs > 2:
71+
rospy.loginfo("Old speech discarded")
72+
return
73+
6974
data = SR.AudioData(bytes(msg.data), self.sample_rate, self.sample_width)
7075
with open(str(len(msg.data)) + ".wav","wb") as f:
7176
f.write(data.get_wav_data())

0 commit comments

Comments
 (0)