jsk-ros-pkg · k-okada · May 10, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 15, 2025
diff --git a/.github/workflows/python2.yml b/.github/workflows/python2.yml
@@ -15,4 +15,4 @@ jobs:
       - name: Check Python2
         run: |
           apt update -q && apt install -y -q python2
-          python2 -m compileall -x 'voicevox/' .
+          python2 -m compileall -x '[voicevox|emotion_analyzer]/' .
diff --git a/emotion_analyzer/.gitignore b/emotion_analyzer/.gitignore
@@ -0,0 +1 @@
+requirements.txt
diff --git a/emotion_analyzer/CMakeLists.txt b/emotion_analyzer/CMakeLists.txt
@@ -0,0 +1,48 @@
+cmake_minimum_required(VERSION 2.8.3)
+project(emotion_analyzer)
+
+find_package(catkin REQUIRED COMPONENTS
+  catkin_virtualenv
+  std_msgs
+  message_generation
+  )
+
+catkin_python_setup()
+
+add_service_files(
+  FILES
+  AnalyzeText.srv
+  AnalyzeAudio.srv
+  )
+
+generate_messages(
+  DEPENDENCIES
+  std_msgs
+  )
+
+catkin_package(
+  CATKIN_DEPENDS rospy std_msgs message_runtime
+)
+
+find_package(Python3 3.8 QUIET COMPONENTS Interpreter)
+if(NOT Python3_FOUND)
+  message(WARNING "emotion_analyzer (Hume AI) requires python3.8 or newer")
+  return()
+endif()
+message(STATUS "Found Python: ${Python3_EXECUTABLE}")
+message(STATUS "Python Version: ${Python3_VERSION}")
+
+catkin_generate_virtualenv(
+  INPUT_REQUIREMENTS requirements.in
+  PYTHON_INTERPRETER python3
+  CHECK_VENV FALSE
+)
+
+file(GLOB PYTHON_SCRIPTS scripts/*.py)
+catkin_install_python(PROGRAMS ${PYTHON_SCRIPTS}
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+)
+
+install(DIRECTORY launch
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+)
diff --git a/emotion_analyzer/README.md b/emotion_analyzer/README.md
@@ -0,0 +1,64 @@
+# Emotion Analyzer Service using Hume API (ROS1)
+
+This ROS1 package provides a service to analyze emotions from a given text using the [Hume AI](https://www.hume.ai/) API.
+
+## Requirements
+
+- ROS1 Noetic
+- Python 3.8+
+- An API key from Hume AI
+
+## Installation
+
+Clone this repository and move to this directory
+
+``` bash
+rosdep install -iry --from-paths .
+catkin build --this
+```
+
+then source your workspace
+
+## Usage (Quick)
+
+Using your microphone
+
+``` bash
+roslaunch emotion_analyzer sample_emotion_analyzer.launch api_key:=<your_api_key>
+```
+
+
+## Usage
+
+### 1. Launch Emotion_Analyzer
+```bash
+roslaunch emotion_analyzer emotion_analyzer.launch api_key:=<your_api_key>
+```
+
+### 2. Call the service
+For text,
+```bash
+rosservice call /analyze_text "text: '<text you want to analyze>'"
+```
+For prepared audio (up to 5 seconds),
+```bash
+rosservice call /analyze_audio "audio_file: <audio_file_path>"
+```
+As a sample, you can use `'/home/leus/ros/catkin_ws/src/jsk_3rdparty/emotion_analyzer/data/purugacha_short.wav'` as <audio_file_path>.
+
+For audio from microphone,
+```bash
+roslaunch audio_capture capture.launch format:=wave
+rosservice call /analyze_audio "audio_file: ''"
+```
+You can check the device information by `arecord -l`.
+Sometimes you need to replace "hw" with "plughw": 
+for example, `roslaunch audio_capture capture.launch format:=wave device:=plughw:1,0`.
+When the device is busy, you can try `fuser -v /dev/snd/*` to get PID and kill it by `kill -9 <PID>`.
+
+
+
+
+
+
+
diff --git a/emotion_analyzer/data/purugacha.wav b/emotion_analyzer/data/purugacha.wav
diff --git a/emotion_analyzer/data/purugacha_short.wav b/emotion_analyzer/data/purugacha_short.wav
diff --git a/emotion_analyzer/launch/emotion_analyzer.launch b/emotion_analyzer/launch/emotion_analyzer.launch
@@ -0,0 +1,14 @@
+<launch>
+  <!-- check the audio topic -->
+  <arg name="input_audio" default="/audio/audio" />
+
+  <!-- set API key from the command line -->
+  <arg name="api_key" default="" />
+
+  <param name="hume_api_key" value="$(arg api_key)" />
+
+  <node name="analyze_text_service" pkg="emotion_analyzer" type="analyze_text_service.py" output="screen" />
+  <node name="analyze_audio_service" pkg="emotion_analyzer" type="analyze_audio_service.py" output="screen" >
+    <remap from="~audio" to="$(arg input_audio)" />
+  </node>
+</launch>
diff --git a/emotion_analyzer/launch/sample_emotion_analyzer.launch b/emotion_analyzer/launch/sample_emotion_analyzer.launch
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="api_key" default="" />
+
+  <include file="$(find audio_capture)/launch/capture.launch">
+    <arg name="format" default="wave" />
+  </include>
+
+  <include file="$(find emotion_analyzer)/launch/emotion_analyzer.launch"
+           pass_all_args="true" >
+    <arg name="input_audio" value="/audio/audio" />
+  </include>
+
+</launch>
diff --git a/emotion_analyzer/package.xml b/emotion_analyzer/package.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<package format="2">
+  <name>emotion_analyzer</name>
+  <version>0.0.0</version>
+  <description>The emotion_analyzer package</description>
+
+  <maintainer email="k-okada@jsk.imi.i.u-tokyo.ac.jp">Kei Okada</maintainer>
+  <maintainer email="obinata@jsk.imi.i.u-tokyo.ac.jp">Yoshiki Obinata</maintainer>
+
+  <license>BSD</license>
+
+  <author email="nagata@jsk.imi.i.u-tokyo.ac.jp">Ayaha Nagata</author>
+
+  <buildtool_depend>catkin</buildtool_depend>
+
+  <build_depend>catkin_virtualenv</build_depend>
+  <build_depend>message_generation</build_depend>
+
+  <exec_depend>audio_capture</exec_depend>
+  <exec_depend>message_runtime</exec_depend>
+  <exec_depend>python3-pydub</exec_depend>
+  <exec_depend>python3-soundfile</exec_depend>
+  <exec_depend>rospy</exec_depend>
+
+  <depend>std_msgs</depend>
+
+  <export>
+    <pip_requirements>requirements.txt</pip_requirements>
+  </export>
+</package>
diff --git a/emotion_analyzer/requirements.in b/emotion_analyzer/requirements.in
@@ -0,0 +1 @@
+hume[stream]
diff --git a/emotion_analyzer/scripts/analyze_audio_service.py b/emotion_analyzer/scripts/analyze_audio_service.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3   
+import rospy
+import asyncio
+from hume import HumeStreamClient
+from hume.models.config import ProsodyConfig, BurstConfig
+from emotion_analyzer.srv import AnalyzeAudio, AnalyzeAudioResponse
+from emotion_analyzer.utils.audio_buffer import AudioBuffer
+import soundfile as sf
+from pydub import AudioSegment
+from io import BytesIO
+from base64 import b64encode
+import os
+import pprint
+import json
+from std_msgs.msg import String
+from audio_common_msgs.msg import AudioInfo
+
+class AudioServiceNode:
+    def __init__(self):
+        self.api_key = rospy.get_param("hume_api_key", None)
+        if not self.api_key:
+            rospy.logerr("API key has not been set")
+            exit(1)
+
+        self.client = HumeStreamClient(self.api_key)
+        self.config = [BurstConfig(), ProsodyConfig()]
+        self.audio_buffer = AudioBuffer(topic_name="~audio",
+                                        window_size=2.0,
+                                        auto_start=True)
+        self.expected_coding_format = "wave"
+        self.audio_info_sub = rospy.Subscriber("/audio/audio_info", AudioInfo, self.audio_info_callback)
+        rospy.Service("analyze_audio", AnalyzeAudio, self.handle_request)
+        rospy.loginfo("Audio-to-Emotion Analysis Service ready.")
+
+    def audio_info_callback(self, msg):
+        #check the audio format
+        if msg.coding_format != self.expected_coding_format:
+            rospy.logwarn(f"Coding_format mismatch: expected {self.expected_coding_format}, got {msg.coding_format}")
+
+    def handle_request(self, req):
+        rospy.loginfo("Received request for analysis")  
+        result = asyncio.run(self.analyze_audio(req.audio_file))
+        rospy.loginfo("Finished analysis")  
+        if isinstance(result, dict):
+            result_json = json.dumps(result)
+        else:
+            result_json = str(result)
+        return AnalyzeAudioResponse(result=result_json)
+
+    async def analyze_audio(self, audio_file):
+        if audio_file:
+            segment = AudioSegment.from_file(audio_file)
+            wav_bytes = segment.raw_data
+            sample_width = segment.sample_width
+            channels = segment.channels
+            frame_rate = segment.frame_rate
+        else:
+            samples = self.audio_buffer.read()
+            if samples is None or len(samples) == 0:
+                rospy.logwarn("Audio data cannot be found. Check the audio topic name.")
+                return {"error": "No audio data received."}            
+            wav_bytes = samples.tobytes()
+            sample_width = self.audio_buffer.bitdepth // 8
+            channels = self.audio_buffer.n_channel
+            frame_rate = self.audio_buffer.input_sample_rate
+            segment = AudioSegment(
+                data=wav_bytes,
+                sample_width=sample_width,
+                channels=channels,
+                frame_rate=frame_rate,
+            )
+        # check the length of the audio
+        duration_ms = len(segment)
+        if duration_ms > 5000:
+            raise Exception(f"Audio is too long: audio length = {duration_ms}ms")
+
+        buf = BytesIO()
+        segment.export(buf, format="wav")
+        wav_bytes = buf.getvalue()
+        b64_audio_str = b64encode(wav_bytes).decode("utf-8")
+
+        async with self.client.connect(self.config) as socket:
+            result = await socket.send_bytes(b64_audio_str.encode("utf-8"))
+            pprint.pprint(result)
+
+            result_prosody = None
+            result_burst = None
+
+            if result and isinstance(result, dict):
+                if 'prosody' in result and 'predictions' in result['prosody']:
+                    result_prosody = result['prosody']['predictions'][0]['emotions']
+                if 'burst' in result and 'predictions' in result['burst']:
+                    result_burst = result['burst']['predictions'][0]['emotions']
+                # if not predictions:
+                #     rospy.logwarn("No predictions found in the result.")
+                return {"prosody": result_prosody, "burst": result_burst}
+            else:
+                rospy.logerr("Error in receiving valid result.")
+                return {"prosody": None, "burst": None}
+            #emotions = result["prosody"]["predictions"][0]["emotions"]
+            #return str(emotions)  
+
+if __name__ == "__main__":
+    rospy.init_node("analyze_audio_service_node")
+    AudioServiceNode()
+    rospy.spin()
diff --git a/emotion_analyzer/scripts/analyze_text_service.py b/emotion_analyzer/scripts/analyze_text_service.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+import rospy
+import asyncio
+from hume import HumeStreamClient
+from hume.models.config import LanguageConfig
+from emotion_analyzer.srv import AnalyzeText, AnalyzeTextResponse
+import pprint
+import json
+
+class TextServiceNode:
+    def __init__(self):
+        self.api_key = rospy.get_param("hume_api_key", None)
+        if self.api_key is None:
+            rospy.logerr("API key has not been set")
+            exit(1)
+
+        self.client = HumeStreamClient(self.api_key)
+        self.config = LanguageConfig(granularity="sentence")
+        #granularity="word": analyze each word / granularity="turn": analyze whole text
+        rospy.Service("analyze_text", AnalyzeText, self.handle_request)
+        rospy.loginfo("Text-to-Emotion Analysis Service ready.")
+
+    def handle_request(self, req):
+        rospy.loginfo("Received text for analysis")  
+        result = asyncio.run(self.analyze_text(req.text))
+        rospy.loginfo("Finished analysis")  
+        result_json = json.dumps(result)
+        return AnalyzeTextResponse(result_json)
+
+    async def analyze_text(self, text):
+        async with self.client.connect([self.config]) as socket:
+            result = await socket.send_text(text)
+            pprint.pprint(result)
+            emotions = result["language"]["predictions"][0]["emotions"]
+            return {"emotions": emotions}  # return dict
+
+if __name__ == "__main__":
+    rospy.init_node("analyze_text_service_node")
+    TextServiceNode()
+    rospy.spin()
diff --git a/emotion_analyzer/setup.py b/emotion_analyzer/setup.py
@@ -0,0 +1,11 @@
+# ~/ros/catkin_ws/src/jsk_3rdparty/emotion_analyzer/setup.py
+
+from distutils.core import setup
+from catkin_pkg.python_setup import generate_distutils_setup
+
+d = generate_distutils_setup(
+    packages=['emotion_analyzer'],
+    package_dir={'': 'src'}
+)
+
+setup(**d)
diff --git a/emotion_analyzer/src/emotion_analyzer/__init__.py b/emotion_analyzer/src/emotion_analyzer/__init__.py