Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ jobs:
- name: Check Python2
run: |
apt update -q && apt install -y -q python2
python2 -m compileall -x 'voicevox/' .
python2 -m compileall -x '[voicevox|emotion_analyzer]/' .
1 change: 1 addition & 0 deletions emotion_analyzer/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requirements.txt
48 changes: 48 additions & 0 deletions emotion_analyzer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
cmake_minimum_required(VERSION 2.8.3)
project(emotion_analyzer)

find_package(catkin REQUIRED COMPONENTS
catkin_virtualenv
std_msgs
message_generation
)

catkin_python_setup()

add_service_files(
FILES
AnalyzeText.srv
AnalyzeAudio.srv
)

generate_messages(
DEPENDENCIES
std_msgs
)

catkin_package(
CATKIN_DEPENDS rospy std_msgs message_runtime
)

find_package(Python3 3.8 QUIET COMPONENTS Interpreter)
if(NOT Python3_FOUND)
message(WARNING "emotion_analyzer (Hume AI) requires python3.8 or newer")
return()
endif()
message(STATUS "Found Python: ${Python3_EXECUTABLE}")
message(STATUS "Python Version: ${Python3_VERSION}")

catkin_generate_virtualenv(
INPUT_REQUIREMENTS requirements.in
PYTHON_INTERPRETER python3
CHECK_VENV FALSE
)

file(GLOB PYTHON_SCRIPTS scripts/*.py)
catkin_install_python(PROGRAMS ${PYTHON_SCRIPTS}
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
)

install(DIRECTORY launch
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
)
64 changes: 64 additions & 0 deletions emotion_analyzer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Emotion Analyzer Service using Hume API (ROS1)

This ROS1 package provides a service to analyze emotions from a given text using the [Hume AI](https://www.hume.ai/) API.

## Requirements

- ROS1 Noetic
- Python 3.8+
- An API key from Hume AI

## Installation

Clone this repository and move to this directory

``` bash
rosdep install -iry --from-paths .
catkin build --this
```

then source your workspace

## Usage (Quick)

Using your microphone

``` bash
roslaunch emotion_analyzer sample_emotion_analyzer.launch api_key:=<your_api_key>
```


## Usage

### 1. Launch Emotion_Analyzer
```bash
roslaunch emotion_analyzer emotion_analyzer.launch api_key:=<your_api_key>
```

### 2. Call the service
For text,
```bash
rosservice call /analyze_text "text: '<text you want to analyze>'"
```
For prepared audio (up to 5 seconds),
```bash
rosservice call /analyze_audio "audio_file: <audio_file_path>"
```
As a sample, you can use `'/home/leus/ros/catkin_ws/src/jsk_3rdparty/emotion_analyzer/data/purugacha_short.wav'` as <audio_file_path>.

For audio from microphone,
```bash
roslaunch audio_capture capture.launch format:=wave
rosservice call /analyze_audio "audio_file: ''"
```
You can check the device information by `arecord -l`.
Sometimes you need to replace "hw" with "plughw":
for example, `roslaunch audio_capture capture.launch format:=wave device:=plughw:1,0`.
When the device is busy, you can try `fuser -v /dev/snd/*` to get PID and kill it by `kill -9 <PID>`.







Binary file added emotion_analyzer/data/purugacha.wav
Binary file not shown.
Binary file added emotion_analyzer/data/purugacha_short.wav
Binary file not shown.
14 changes: 14 additions & 0 deletions emotion_analyzer/launch/emotion_analyzer.launch
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<launch>
<!-- check the audio topic -->
<arg name="input_audio" default="/audio/audio" />

<!-- set API key from the command line -->
<arg name="api_key" default="" />

<param name="hume_api_key" value="$(arg api_key)" />

<node name="analyze_text_service" pkg="emotion_analyzer" type="analyze_text_service.py" output="screen" />
<node name="analyze_audio_service" pkg="emotion_analyzer" type="analyze_audio_service.py" output="screen" >
<remap from="~audio" to="$(arg input_audio)" />
</node>
</launch>
14 changes: 14 additions & 0 deletions emotion_analyzer/launch/sample_emotion_analyzer.launch
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<launch>
<arg name="api_key" default="" />

<include file="$(find audio_capture)/launch/capture.launch">
<arg name="format" default="wave" />
</include>

<include file="$(find emotion_analyzer)/launch/emotion_analyzer.launch"
pass_all_args="true" >
<arg name="input_audio" value="/audio/audio" />
</include>

</launch>
30 changes: 30 additions & 0 deletions emotion_analyzer/package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?xml version="1.0"?>
<package format="2">
<name>emotion_analyzer</name>
<version>0.0.0</version>
<description>The emotion_analyzer package</description>

<maintainer email="k-okada@jsk.imi.i.u-tokyo.ac.jp">Kei Okada</maintainer>
<maintainer email="obinata@jsk.imi.i.u-tokyo.ac.jp">Yoshiki Obinata</maintainer>

<license>BSD</license>

<author email="nagata@jsk.imi.i.u-tokyo.ac.jp">Ayaha Nagata</author>

<buildtool_depend>catkin</buildtool_depend>

<build_depend>catkin_virtualenv</build_depend>
<build_depend>message_generation</build_depend>

<exec_depend>audio_capture</exec_depend>
<exec_depend>message_runtime</exec_depend>
<exec_depend>python3-pydub</exec_depend>
<exec_depend>python3-soundfile</exec_depend>
<exec_depend>rospy</exec_depend>

<depend>std_msgs</depend>

<export>
<pip_requirements>requirements.txt</pip_requirements>
</export>
</package>
1 change: 1 addition & 0 deletions emotion_analyzer/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hume[stream]
106 changes: 106 additions & 0 deletions emotion_analyzer/scripts/analyze_audio_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
import rospy
import asyncio
from hume import HumeStreamClient
from hume.models.config import ProsodyConfig, BurstConfig
from emotion_analyzer.srv import AnalyzeAudio, AnalyzeAudioResponse
from emotion_analyzer.utils.audio_buffer import AudioBuffer
import soundfile as sf
from pydub import AudioSegment
from io import BytesIO
from base64 import b64encode
import os
import pprint
import json
from std_msgs.msg import String
from audio_common_msgs.msg import AudioInfo

class AudioServiceNode:
def __init__(self):
self.api_key = rospy.get_param("hume_api_key", None)
if not self.api_key:
rospy.logerr("API key has not been set")
exit(1)

self.client = HumeStreamClient(self.api_key)
self.config = [BurstConfig(), ProsodyConfig()]
self.audio_buffer = AudioBuffer(topic_name="~audio",
window_size=2.0,
auto_start=True)
self.expected_coding_format = "wave"
self.audio_info_sub = rospy.Subscriber("/audio/audio_info", AudioInfo, self.audio_info_callback)
rospy.Service("analyze_audio", AnalyzeAudio, self.handle_request)
rospy.loginfo("Audio-to-Emotion Analysis Service ready.")

def audio_info_callback(self, msg):
#check the audio format
if msg.coding_format != self.expected_coding_format:
rospy.logwarn(f"Coding_format mismatch: expected {self.expected_coding_format}, got {msg.coding_format}")

def handle_request(self, req):
rospy.loginfo("Received request for analysis")
result = asyncio.run(self.analyze_audio(req.audio_file))
rospy.loginfo("Finished analysis")
if isinstance(result, dict):
result_json = json.dumps(result)
else:
result_json = str(result)
return AnalyzeAudioResponse(result=result_json)

async def analyze_audio(self, audio_file):
if audio_file:
segment = AudioSegment.from_file(audio_file)
wav_bytes = segment.raw_data
sample_width = segment.sample_width
channels = segment.channels
frame_rate = segment.frame_rate
else:
samples = self.audio_buffer.read()
if samples is None or len(samples) == 0:
rospy.logwarn("Audio data cannot be found. Check the audio topic name.")
return {"error": "No audio data received."}
wav_bytes = samples.tobytes()
sample_width = self.audio_buffer.bitdepth // 8
channels = self.audio_buffer.n_channel
frame_rate = self.audio_buffer.input_sample_rate
segment = AudioSegment(
data=wav_bytes,
sample_width=sample_width,
channels=channels,
frame_rate=frame_rate,
)
# check the length of the audio
duration_ms = len(segment)
if duration_ms > 5000:
raise Exception(f"Audio is too long: audio length = {duration_ms}ms")

buf = BytesIO()
segment.export(buf, format="wav")
wav_bytes = buf.getvalue()
b64_audio_str = b64encode(wav_bytes).decode("utf-8")

async with self.client.connect(self.config) as socket:
result = await socket.send_bytes(b64_audio_str.encode("utf-8"))
pprint.pprint(result)

result_prosody = None
result_burst = None

if result and isinstance(result, dict):
if 'prosody' in result and 'predictions' in result['prosody']:
result_prosody = result['prosody']['predictions'][0]['emotions']
if 'burst' in result and 'predictions' in result['burst']:
result_burst = result['burst']['predictions'][0]['emotions']
# if not predictions:
# rospy.logwarn("No predictions found in the result.")
return {"prosody": result_prosody, "burst": result_burst}
else:
rospy.logerr("Error in receiving valid result.")
return {"prosody": None, "burst": None}
#emotions = result["prosody"]["predictions"][0]["emotions"]
#return str(emotions)

if __name__ == "__main__":
rospy.init_node("analyze_audio_service_node")
AudioServiceNode()
rospy.spin()
40 changes: 40 additions & 0 deletions emotion_analyzer/scripts/analyze_text_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3
import rospy
import asyncio
from hume import HumeStreamClient
from hume.models.config import LanguageConfig
from emotion_analyzer.srv import AnalyzeText, AnalyzeTextResponse
import pprint
import json

class TextServiceNode:
def __init__(self):
self.api_key = rospy.get_param("hume_api_key", None)
if self.api_key is None:
rospy.logerr("API key has not been set")
exit(1)

self.client = HumeStreamClient(self.api_key)
self.config = LanguageConfig(granularity="sentence")
#granularity="word": analyze each word / granularity="turn": analyze whole text
rospy.Service("analyze_text", AnalyzeText, self.handle_request)
rospy.loginfo("Text-to-Emotion Analysis Service ready.")

def handle_request(self, req):
rospy.loginfo("Received text for analysis")
result = asyncio.run(self.analyze_text(req.text))
rospy.loginfo("Finished analysis")
result_json = json.dumps(result)
return AnalyzeTextResponse(result_json)

async def analyze_text(self, text):
async with self.client.connect([self.config]) as socket:
result = await socket.send_text(text)
pprint.pprint(result)
emotions = result["language"]["predictions"][0]["emotions"]
return {"emotions": emotions} # return dict

if __name__ == "__main__":
rospy.init_node("analyze_text_service_node")
TextServiceNode()
rospy.spin()
11 changes: 11 additions & 0 deletions emotion_analyzer/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# ~/ros/catkin_ws/src/jsk_3rdparty/emotion_analyzer/setup.py

from distutils.core import setup
from catkin_pkg.python_setup import generate_distutils_setup

d = generate_distutils_setup(
packages=['emotion_analyzer'],
package_dir={'': 'src'}
)

setup(**d)
Empty file.
Loading
Loading