Enhance VOICEVOX Docker support with flexible deployment options

iory · iory · commit 3efaf422ddb8 · 2025-09-25T19:53:43.000+09:00
- Add docker_only option for launching Docker container without ROS sound_play node
  - Add remote connection support with automatic local/remote detection
  - Improve README with detailed usage examples for different deployment scenarios
  - Fix host binding issues to prevent port conflicts when connecting to remote containers
  - Support GPU acceleration with proper documentation

  Features:
  - docker_only: Launch only Docker container (useful for server deployment)
  - Automatic local/remote detection based on host parameter
  - Enhanced run-voicevox-docker script with remote connection handling
  - Comprehensive documentation for all usage patterns
diff --git a/3rdparty/voicevox/CMakeLists.txt b/3rdparty/voicevox/CMakeLists.txt
@@ -91,13 +91,16 @@ endif()
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bin/run-voicevox.in
   ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox
   @ONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bin/run-voicevox-docker.in
+  ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox-docker
+  @ONLY)
 
 
 catkin_install_python(
   PROGRAMS node_scripts/request_synthesis.py node_scripts/list_speakers.py
   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts/)
 install(
-  PROGRAMS bin/text2wave ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox
+  PROGRAMS bin/text2wave ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/bin/run-voicevox-docker
   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin)
 
 install(DIRECTORY launch
diff --git a/3rdparty/voicevox/README.md b/3rdparty/voicevox/README.md
@@ -2,6 +2,34 @@
 
 ROS Interface for [VOICEVOX](https://voicevox.hiroshiba.jp/) (AI speech synthesis)
 
+## Quick Start
+
+Choose the deployment pattern that best fits your needs:
+
+```bash
+# Pattern A: Direct Docker + ROS client (Multi-machine) | 直接Docker + ROSクライアント（マルチマシン）
+# Server machine:
+docker run --rm --gpus all -p '50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
+# Client machine:
+roslaunch voicevox voicevox_texttospeech.launch host:=<server_ip> port:=50021
+
+# Pattern B: ROS-managed Docker + ROS client (Multi-machine) | ROS管理Docker + ROSクライアント（マルチマシン）
+# Server machine:
+roslaunch voicevox voicevox_texttospeech.launch docker_only:=true
+# Client machine:
+roslaunch voicevox voicevox_texttospeech.launch host:=<server_ip> port:=50021
+
+# Pattern C: All-in-one (Docker + ROS on same machine) | オールインワン（同一マシンでDocker+ROS）
+roslaunch voicevox voicevox_texttospeech.launch use_docker:=true
+
+# Pattern D: Local VOICEVOX without Docker (CPU only) | ローカルVOICEVOX（Dockerなし、CPU版）
+# VOICEVOX engine is automatically installed during catkin build
+roslaunch voicevox voicevox_texttospeech.launch
+```
+
+Patterns A-C provide GPU acceleration support through Docker. Pattern D runs locally with CPU only.
+パターンA-CはDockerを通じたGPUによる高速な音声合成をサポート。パターンDはCPUのみでローカル実行。
+
 ## TERM
 
 [VOICEVOX](https://voicevox.hiroshiba.jp/) is basically free to use, but please check the terms of use below.
@@ -55,29 +83,73 @@ cd /path/to/catkin_workspace
 catkin build voicevox
 ```
 
-### Optional (Using docker with GPU)
+### Optional (Using Docker with GPU acceleration)
+
+VOICEVOX supports Docker deployment with GPU acceleration, which enables significantly faster speech synthesis compared to CPU-only processing.
+
+#### Prerequisites
 
-First, install the NVIDIA Container Toolkit.
+First, install the NVIDIA Container Toolkit:
 
 https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
 
-After that,
+#### Setup
 
-```
+Pull the GPU-enabled VOICEVOX Docker image:
+
+```bash
 docker pull voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
 ```
 
+Start the Docker container with GPU support:
 
-You should start Docker as follows.
-
-```
+```bash
 docker run --rm --gpus all -p '50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest
 ```
 
-```
+#### Usage with use_docker option
+
+Launch the ROS node with Docker backend using the `use_docker:=true` option:
+
+```bash
+# For local Docker container
+roslaunch voicevox voicevox_texttospeech.launch use_docker:=true
+
+# For remote Docker container
 roslaunch voicevox voicevox_texttospeech.launch use_docker:=true host:=<Docker PC IP>
 ```
 
+#### Docker-only mode
+
+If you want to launch only the Docker container without the ROS sound_play node, use the `docker_only:=true` option:
+
+```bash
+# Launch Docker container only (local)
+roslaunch voicevox voicevox_texttospeech.launch docker_only:=true
+
+# Launch Docker container only with custom port
+roslaunch voicevox voicevox_texttospeech.launch docker_only:=true port:=50022
+```
+
+#### Connect to existing Docker container
+
+To connect to an existing Docker container running on a different machine:
+
+```bash
+# Connect to remote Docker container
+roslaunch voicevox voicevox_texttospeech.launch host:=<Docker PC IP> port:=<Docker Port>
+
+# Example: Connect to Docker container at 192.168.1.100:50021
+roslaunch voicevox voicevox_texttospeech.launch host:=192.168.1.100 port:=50021
+```
+
+The `use_docker` and `docker_only` options allow you to:
+- **use_docker**: Run Docker container and ROS node together
+- **docker_only**: Launch only the Docker container (useful for server deployment)
+- Utilize GPU acceleration for faster speech synthesis
+- Run VOICEVOX engine in an isolated container environment
+- Easily deploy on different machines without local installation
+
 
 ## Usage
 
diff --git a/3rdparty/voicevox/bin/run-voicevox-docker.in b/3rdparty/voicevox/bin/run-voicevox-docker.in
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Original arguments
+args=("$@")
+
+# Filtered arguments (up to first "--")
+filtered_args=()
+for arg in "${args[@]}"; do
+  if [[ "$arg" == -- ]]; then
+    break
+  fi
+  filtered_args+=("$arg")
+done
+
+# Extract arguments for docker run
+host="localhost"
+port="50021"
+cpu_num_threads="1"
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --host)
+      host="$2"
+      shift 2
+      ;;
+    --port)
+      port="$2"
+      shift 2
+      ;;
+    --cpu_num_threads)
+      cpu_num_threads="$2"
+      shift 2
+      ;;
+    --)
+      shift
+      break
+      ;;
+    *)
+      shift
+      ;;
+  esac
+done
+
+# Check if host is localhost (start local container) or remote (just wait)
+if [[ "$host" == "localhost" || "$host" == "127.0.0.1" ]]; then
+  # Local Docker container mode
+  container_name="voicevox-engine-${port}"
+  if docker ps | grep -q "$container_name"; then
+    echo "VOICEVOX Docker container is already running on port $port"
+    # Keep the container running
+    docker logs -f "$container_name"
+    exit 0
+  fi
+
+  # Remove any existing stopped container with the same name
+  docker rm -f "$container_name" 2>/dev/null || true
+
+  # Check if voicevox image exists, if not pull it
+  if ! docker images | grep -q "voicevox/voicevox_engine"; then
+    echo "Pulling VOICEVOX Docker image..."
+    docker pull voicevox/voicevox_engine:latest
+  fi
+
+  # Run VOICEVOX Docker container
+  echo "Starting VOICEVOX Docker container on port $port with $cpu_num_threads CPU threads..."
+  exec docker run --rm \
+    --name "$container_name" \
+    -p "$port:50021" \
+    -e CPU_NUM_THREADS="$cpu_num_threads" \
+    voicevox/voicevox_engine:cpu-ubuntu20.04-latest
+else
+  # Remote Docker container mode - just wait for connection
+  echo "Connecting to remote VOICEVOX Docker container at $host:$port"
+  echo "Make sure the Docker container is running on the remote host with:"
+  echo "docker run --rm --gpus all -p '$port:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest"
+  echo ""
+  echo "Waiting for connection to $host:$port..."
+  
+  # Wait for the remote service to be available
+  while true; do
+    if curl -s --connect-timeout 5 "http://$host:$port/docs" > /dev/null 2>&1; then
+      echo "Successfully connected to VOICEVOX engine at $host:$port"
+      break
+    else
+      echo "Waiting for VOICEVOX engine at $host:$port..."
+      sleep 5
+    fi
+  done
+  
+  # Keep the script running
+  echo "VOICEVOX engine is ready at $host:$port"
+  while true; do
+    sleep 30
+    if ! curl -s --connect-timeout 5 "http://$host:$port/docs" > /dev/null 2>&1; then
+      echo "Lost connection to VOICEVOX engine at $host:$port"
+      exit 1
+    fi
+  done
+fi
diff --git a/3rdparty/voicevox/launch/voicevox_texttospeech.launch b/3rdparty/voicevox/launch/voicevox_texttospeech.launch
@@ -12,16 +12,28 @@
        doc="Number of cpu threads" />
   <arg name="use_docker" default="false"
        doc="Use docker or not (default: false)" />
+  <arg name="docker_only" default="false"
+       doc="Launch only docker container without sound_play node (default: false)" />
+  <arg name="remote_only" default="false"
+       doc="Connect to remote server without starting local server (default: false)" />
 
   <node name="voicevox_server"
         pkg="voicevox" type="run-voicevox"
-        args="--voicelib_dir=$(find voicevox)/voicevox_core --host $(arg host) --port $(arg port) --cpu_num_threads=$(arg cpu_num_threads) --load_all_models --"
+        args="--voicelib_dir=$(find voicevox)/voicevox_core --host 0.0.0.0 --port $(arg port) --cpu_num_threads=$(arg cpu_num_threads) --load_all_models --"
         respawn="$(arg sound_play_respawn)"
         output="screen"
-        unless="$(arg use_docker)">
+        unless="$(eval arg('use_docker') or arg('docker_only') or arg('remote_only') or (arg('host') != 'localhost' and arg('host') != '127.0.0.1'))">
   </node>
 
-  <node if="$(arg launch_sound_play)"
+  <node name="voicevox_docker_server"
+        pkg="voicevox" type="run-voicevox-docker"
+        args="--host $(arg host) --port $(arg port) --cpu_num_threads=$(arg cpu_num_threads) --"
+        respawn="$(arg sound_play_respawn)"
+        output="screen"
+        if="$(eval arg('use_docker') or arg('docker_only'))">
+  </node>
+
+  <node if="$(eval arg('launch_sound_play') and not arg('docker_only'))"
         name="sound_play_jp"
         pkg="sound_play" type="soundplay_node.py"
         respawn="$(arg sound_play_respawn)"