Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion 3rdparty/voicevox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,16 @@ endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bin/run-voicevox.in
${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox
@ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bin/run-voicevox-docker.in
${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox-docker
@ONLY)


catkin_install_python(
PROGRAMS node_scripts/request_synthesis.py node_scripts/list_speakers.py
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts/)
install(
PROGRAMS bin/text2wave ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox
PROGRAMS bin/text2wave ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox-docker
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin)

install(DIRECTORY launch
Expand Down
96 changes: 96 additions & 0 deletions 3rdparty/voicevox/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,34 @@

ROS Interface for [VOICEVOX](https://voicevox.hiroshiba.jp/) (AI speech synthesis)

## Quick Start

Choose the deployment pattern that best fits your needs:

```bash
# Pattern A: Direct Docker + ROS client (Multi-machine) | 直接Docker + ROSクライアント(マルチマシン)
# Server machine:
docker run --rm --gpus all -p '50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
# Client machine:
roslaunch voicevox voicevox_texttospeech.launch host:=<server_ip> port:=50021

# Pattern B: ROS-managed Docker + ROS client (Multi-machine) | ROS管理Docker + ROSクライアント(マルチマシン)
# Server machine:
roslaunch voicevox voicevox_texttospeech.launch docker_only:=true
# Client machine:
roslaunch voicevox voicevox_texttospeech.launch host:=<server_ip> port:=50021

# Pattern C: All-in-one (Docker + ROS on same machine) | オールインワン(同一マシンでDocker+ROS)
roslaunch voicevox voicevox_texttospeech.launch use_docker:=true

# Pattern D: Local VOICEVOX without Docker (CPU only) | ローカルVOICEVOX(Dockerなし、CPU版)
# VOICEVOX engine is automatically installed during catkin build
roslaunch voicevox voicevox_texttospeech.launch
```

Patterns A-C provide GPU acceleration support through Docker. Pattern D runs locally with CPU only.
パターンA-CはDockerを通じたGPUによる高速な音声合成をサポート。パターンDはCPUのみでローカル実行。

## TERM

[VOICEVOX](https://voicevox.hiroshiba.jp/) is basically free to use, but please check the terms of use below.
Expand Down Expand Up @@ -55,6 +83,74 @@ cd /path/to/catkin_workspace
catkin build voicevox
```

### Optional (Using Docker with GPU acceleration)

VOICEVOX supports Docker deployment with GPU acceleration, which enables significantly faster speech synthesis compared to CPU-only processing.

#### Prerequisites

First, install the NVIDIA Container Toolkit:

https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html

#### Setup

Pull the GPU-enabled VOICEVOX Docker image:

```bash
docker pull voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
```

Start the Docker container with GPU support:

```bash
docker run --rm --gpus all -p '50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
```

#### Usage with use_docker option

Launch the ROS node with Docker backend using the `use_docker:=true` option:

```bash
# For local Docker container
roslaunch voicevox voicevox_texttospeech.launch use_docker:=true

# For remote Docker container
roslaunch voicevox voicevox_texttospeech.launch use_docker:=true host:=<Docker PC IP>
```

#### Docker-only mode

If you want to launch only the Docker container without the ROS sound_play node, use the `docker_only:=true` option:

```bash
# Launch Docker container only (local)
roslaunch voicevox voicevox_texttospeech.launch docker_only:=true

# Launch Docker container only with custom port
roslaunch voicevox voicevox_texttospeech.launch docker_only:=true port:=50022
```

#### Connect to existing Docker container

To connect to an existing Docker container running on a different machine:

```bash
# Connect to remote Docker container
roslaunch voicevox voicevox_texttospeech.launch host:=<Docker PC IP> port:=<Docker Port>

# Example: Connect to Docker container at 192.168.1.100:50021
roslaunch voicevox voicevox_texttospeech.launch host:=192.168.1.100 port:=50021
```

The `use_docker` and `docker_only` options allow you to:
- **use_docker**: Run Docker container and ROS node together
- **docker_only**: Launch only the Docker container (useful for server deployment)
- Utilize GPU acceleration for faster speech synthesis
- Run VOICEVOX engine in an isolated container environment
- Easily deploy on different machines without local installation


## Usage

### Launch sound_play with VOICEVOX Text-to-Speech
Expand Down
100 changes: 100 additions & 0 deletions 3rdparty/voicevox/bin/run-voicevox-docker.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/bin/bash

# Original arguments
args=("$@")

# Filtered arguments (up to first "--")
filtered_args=()
for arg in "${args[@]}"; do
if [[ "$arg" == -- ]]; then
break
fi
filtered_args+=("$arg")
done

# Extract arguments for docker run
host="localhost"
port="50021"
cpu_num_threads="1"

# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--host)
host="$2"
shift 2
;;
--port)
port="$2"
shift 2
;;
--cpu_num_threads)
cpu_num_threads="$2"
shift 2
;;
--)
shift
break
;;
*)
shift
;;
esac
done

# Check if host is localhost (start local container) or remote (just wait)
if [[ "$host" == "localhost" || "$host" == "127.0.0.1" ]]; then
# Local Docker container mode
container_name="voicevox-engine-${port}"
if docker ps | grep -q "$container_name"; then
echo "VOICEVOX Docker container is already running on port $port"
# Keep the container running
docker logs -f "$container_name"
exit 0
fi

# Remove any existing stopped container with the same name
docker rm -f "$container_name" 2>/dev/null || true

# Check if voicevox image exists, if not pull it
if ! docker images | grep -q "voicevox/voicevox_engine"; then
echo "Pulling VOICEVOX Docker image..."
docker pull voicevox/voicevox_engine:latest
fi

# Run VOICEVOX Docker container
echo "Starting VOICEVOX Docker container on port $port with $cpu_num_threads CPU threads..."
exec docker run --rm \
--name "$container_name" \
-p "$port:50021" \
-e CPU_NUM_THREADS="$cpu_num_threads" \
voicevox/voicevox_engine:cpu-ubuntu20.04-latest
else
# Remote Docker container mode - just wait for connection
echo "Connecting to remote VOICEVOX Docker container at $host:$port"
echo "Make sure the Docker container is running on the remote host with:"
echo "docker run --rm --gpus all -p '$port:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest"
echo ""
echo "Waiting for connection to $host:$port..."

# Wait for the remote service to be available
while true; do
if curl -s --connect-timeout 5 "http://$host:$port/docs" > /dev/null 2>&1; then
echo "Successfully connected to VOICEVOX engine at $host:$port"
break
else
echo "Waiting for VOICEVOX engine at $host:$port..."
sleep 5
fi
done

# Keep the script running
echo "VOICEVOX engine is ready at $host:$port"
while true; do
sleep 30
if ! curl -s --connect-timeout 5 "http://$host:$port/docs" > /dev/null 2>&1; then
echo "Lost connection to VOICEVOX engine at $host:$port"
exit 1
fi
done
fi
21 changes: 18 additions & 3 deletions 3rdparty/voicevox/launch/voicevox_texttospeech.launch
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,30 @@
<arg name="port" default="$(optenv VOICEVOX_TEXTTOSPEECH_PORT 50021)" />
<arg name="cpu_num_threads" default="1"
doc="Number of cpu threads" />
<arg name="use_docker" default="false"
doc="Use docker or not (default: false)" />
<arg name="docker_only" default="false"
doc="Launch only docker container without sound_play node (default: false)" />
<arg name="remote_only" default="false"
doc="Connect to remote server without starting local server (default: false)" />

<node name="voicevox_server"
pkg="voicevox" type="run-voicevox"
args="--voicelib_dir=$(find voicevox)/voicevox_core --host $(arg host) --port $(arg port) --cpu_num_threads=$(arg cpu_num_threads) --load_all_models --"
args="--voicelib_dir=$(find voicevox)/voicevox_core --host 0.0.0.0 --port $(arg port) --cpu_num_threads=$(arg cpu_num_threads) --load_all_models --"
respawn="$(arg sound_play_respawn)"
output="screen" >
output="screen"
unless="$(eval arg('use_docker') or arg('docker_only') or arg('remote_only') or (arg('host') != 'localhost' and arg('host') != '127.0.0.1'))">
</node>

<node name="voicevox_docker_server"
pkg="voicevox" type="run-voicevox-docker"
args="--host $(arg host) --port $(arg port) --cpu_num_threads=$(arg cpu_num_threads) --"
respawn="$(arg sound_play_respawn)"
output="screen"
if="$(eval arg('use_docker') or arg('docker_only'))">
</node>

<node if="$(arg launch_sound_play)"
<node if="$(eval arg('launch_sound_play') and not arg('docker_only'))"
name="sound_play_jp"
pkg="sound_play" type="soundplay_node.py"
respawn="$(arg sound_play_respawn)"
Expand Down
Loading