diff --git a/3rdparty/voicevox/CMakeLists.txt b/3rdparty/voicevox/CMakeLists.txt index c99823cf8..aae4c5b2b 100644 --- a/3rdparty/voicevox/CMakeLists.txt +++ b/3rdparty/voicevox/CMakeLists.txt @@ -94,13 +94,16 @@ endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bin/run-voicevox.in ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox @ONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bin/run-voicevox-docker.in + ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox-docker + @ONLY) catkin_install_python( PROGRAMS node_scripts/request_synthesis.py node_scripts/list_speakers.py DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts/) install( - PROGRAMS bin/text2wave ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox + PROGRAMS bin/text2wave ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox-docker DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin) install(DIRECTORY launch diff --git a/3rdparty/voicevox/README.md b/3rdparty/voicevox/README.md index 213387b5c..21d06374b 100644 --- a/3rdparty/voicevox/README.md +++ b/3rdparty/voicevox/README.md @@ -2,6 +2,34 @@ ROS Interface for [VOICEVOX](https://voicevox.hiroshiba.jp/) (AI speech synthesis) +## Quick Start + +Choose the deployment pattern that best fits your needs: + +```bash +# Pattern A: Direct Docker + ROS client (Multi-machine) | 直接Docker + ROSクライアント(マルチマシン) +# Server machine: +docker run --rm --gpus all -p '50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest +# Client machine: +roslaunch voicevox voicevox_texttospeech.launch host:= port:=50021 + +# Pattern B: ROS-managed Docker + ROS client (Multi-machine) | ROS管理Docker + ROSクライアント(マルチマシン) +# Server machine: +roslaunch voicevox voicevox_texttospeech.launch docker_only:=true +# Client machine: +roslaunch voicevox voicevox_texttospeech.launch host:= port:=50021 + +# Pattern C: All-in-one (Docker + ROS on same machine) | オールインワン(同一マシンでDocker+ROS) +roslaunch voicevox voicevox_texttospeech.launch use_docker:=true + +# Pattern D: Local VOICEVOX without Docker (CPU only) | ローカルVOICEVOX(Dockerなし、CPU版) +# VOICEVOX engine is automatically installed during catkin build +roslaunch voicevox voicevox_texttospeech.launch +``` + +Patterns A-C provide GPU acceleration support through Docker. Pattern D runs locally with CPU only. +パターンA-CはDockerを通じたGPUによる高速な音声合成をサポート。パターンDはCPUのみでローカル実行。 + ## TERM [VOICEVOX](https://voicevox.hiroshiba.jp/) is basically free to use, but please check the terms of use below. @@ -55,6 +83,74 @@ cd /path/to/catkin_workspace catkin build voicevox ``` +### Optional (Using Docker with GPU acceleration) + +VOICEVOX supports Docker deployment with GPU acceleration, which enables significantly faster speech synthesis compared to CPU-only processing. + +#### Prerequisites + +First, install the NVIDIA Container Toolkit: + +https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html + +#### Setup + +Pull the GPU-enabled VOICEVOX Docker image: + +```bash +docker pull voicevox/voicevox_engine:nvidia-ubuntu20.04-latest +``` + +Start the Docker container with GPU support: + +```bash +docker run --rm --gpus all -p '50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest +``` + +#### Usage with use_docker option + +Launch the ROS node with Docker backend using the `use_docker:=true` option: + +```bash +# For local Docker container +roslaunch voicevox voicevox_texttospeech.launch use_docker:=true + +# For remote Docker container +roslaunch voicevox voicevox_texttospeech.launch use_docker:=true host:= +``` + +#### Docker-only mode + +If you want to launch only the Docker container without the ROS sound_play node, use the `docker_only:=true` option: + +```bash +# Launch Docker container only (local) +roslaunch voicevox voicevox_texttospeech.launch docker_only:=true + +# Launch Docker container only with custom port +roslaunch voicevox voicevox_texttospeech.launch docker_only:=true port:=50022 +``` + +#### Connect to existing Docker container + +To connect to an existing Docker container running on a different machine: + +```bash +# Connect to remote Docker container +roslaunch voicevox voicevox_texttospeech.launch host:= port:= + +# Example: Connect to Docker container at 192.168.1.100:50021 +roslaunch voicevox voicevox_texttospeech.launch host:=192.168.1.100 port:=50021 +``` + +The `use_docker` and `docker_only` options allow you to: +- **use_docker**: Run Docker container and ROS node together +- **docker_only**: Launch only the Docker container (useful for server deployment) +- Utilize GPU acceleration for faster speech synthesis +- Run VOICEVOX engine in an isolated container environment +- Easily deploy on different machines without local installation + + ## Usage ### Launch sound_play with VOICEVOX Text-to-Speech diff --git a/3rdparty/voicevox/bin/run-voicevox-docker.in b/3rdparty/voicevox/bin/run-voicevox-docker.in new file mode 100755 index 000000000..784656697 --- /dev/null +++ b/3rdparty/voicevox/bin/run-voicevox-docker.in @@ -0,0 +1,100 @@ +#!/bin/bash + +# Original arguments +args=("$@") + +# Filtered arguments (up to first "--") +filtered_args=() +for arg in "${args[@]}"; do + if [[ "$arg" == -- ]]; then + break + fi + filtered_args+=("$arg") +done + +# Extract arguments for docker run +host="localhost" +port="50021" +cpu_num_threads="1" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --host) + host="$2" + shift 2 + ;; + --port) + port="$2" + shift 2 + ;; + --cpu_num_threads) + cpu_num_threads="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + shift + ;; + esac +done + +# Check if host is localhost (start local container) or remote (just wait) +if [[ "$host" == "localhost" || "$host" == "127.0.0.1" ]]; then + # Local Docker container mode + container_name="voicevox-engine-${port}" + if docker ps | grep -q "$container_name"; then + echo "VOICEVOX Docker container is already running on port $port" + # Keep the container running + docker logs -f "$container_name" + exit 0 + fi + + # Remove any existing stopped container with the same name + docker rm -f "$container_name" 2>/dev/null || true + + # Check if voicevox image exists, if not pull it + if ! docker images | grep -q "voicevox/voicevox_engine"; then + echo "Pulling VOICEVOX Docker image..." + docker pull voicevox/voicevox_engine:latest + fi + + # Run VOICEVOX Docker container + echo "Starting VOICEVOX Docker container on port $port with $cpu_num_threads CPU threads..." + exec docker run --rm \ + --name "$container_name" \ + -p "$port:50021" \ + -e CPU_NUM_THREADS="$cpu_num_threads" \ + voicevox/voicevox_engine:cpu-ubuntu20.04-latest +else + # Remote Docker container mode - just wait for connection + echo "Connecting to remote VOICEVOX Docker container at $host:$port" + echo "Make sure the Docker container is running on the remote host with:" + echo "docker run --rm --gpus all -p '$port:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest" + echo "" + echo "Waiting for connection to $host:$port..." + + # Wait for the remote service to be available + while true; do + if curl -s --connect-timeout 5 "http://$host:$port/docs" > /dev/null 2>&1; then + echo "Successfully connected to VOICEVOX engine at $host:$port" + break + else + echo "Waiting for VOICEVOX engine at $host:$port..." + sleep 5 + fi + done + + # Keep the script running + echo "VOICEVOX engine is ready at $host:$port" + while true; do + sleep 30 + if ! curl -s --connect-timeout 5 "http://$host:$port/docs" > /dev/null 2>&1; then + echo "Lost connection to VOICEVOX engine at $host:$port" + exit 1 + fi + done +fi diff --git a/3rdparty/voicevox/launch/voicevox_texttospeech.launch b/3rdparty/voicevox/launch/voicevox_texttospeech.launch index c264ed183..b34883e5f 100644 --- a/3rdparty/voicevox/launch/voicevox_texttospeech.launch +++ b/3rdparty/voicevox/launch/voicevox_texttospeech.launch @@ -10,15 +10,30 @@ + + + + output="screen" + unless="$(eval arg('use_docker') or arg('docker_only') or arg('remote_only') or (arg('host') != 'localhost' and arg('host') != '127.0.0.1'))"> + + + -