Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions 3rdparty/voicevox3/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
cmake_minimum_required(VERSION 2.8.3)
project(voicevox3)

find_package(catkin REQUIRED COMPONENTS catkin_virtualenv)

catkin_python_setup()

catkin_package()

catkin_generate_virtualenv(
INPUT_REQUIREMENTS requirements.txt
PYTHON_INTERPRETER python3
USE_SYSTEM_PACKAGES FALSE
CHECK_VENV FALSE # Default TRUE
)

include(ExternalProject)
ExternalProject_Add(voicevox_engine
GIT_REPOSITORY https://github.com/VOICEVOX/voicevox_engine
GIT_TAG 0.14.7 # latest version before Python 3.11, Oct 5, 2023
CONFIGURE_COMMAND ""
BUILD_COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_SOURCE_DIR}/voicevox_engine/speaker_info.orig
INSTALL_COMMAND ${CMAKE_COMMAND} -E rename ${PROJECT_SOURCE_DIR}/voicevox_engine/speaker_info ${PROJECT_SOURCE_DIR}/voicevox_engine/speaker_info.orig
SOURCE_DIR ${PROJECT_SOURCE_DIR}/voicevox_engine
)
ExternalProject_Add(voicevox_resource
GIT_REPOSITORY https://github.com/VOICEVOX/voicevox_resource
GIT_TAG main
GIT_PROGRESS TRUE
GIT_SHALLOW TRUE
BUILD_IN_SOURCE TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND python3 ./scripts/clean_character_info.py --output_dir ${PROJECT_SOURCE_DIR}/voicevox_engine/speaker_info
DEPENDS voicevox_engine
)
#set(CORE_VERSION 0.15.7) ## Dec 29, 2024
#set(CORE_HASH "444dc362d98e065b8581e5a9e403b8fc")
set(CORE_VERSION 0.14.6) ## Jan 11, 2024
set(CORE_HASH "26719dab23a8e0b4559516d1f2a78833")
ExternalProject_Add(voicevox_core
URL https://github.com/VOICEVOX/voicevox_core/releases/download/${CORE_VERSION}/voicevox_core-linux-x64-cpu-${CORE_VERSION}.zip
URL_HASH MD5=${CORE_HASH}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
SOURCE_DIR ${PROJECT_SOURCE_DIR}/voicevox_core
)

catkin_install_python(
PROGRAMS node_scripts/request_synthesis.py node_scripts/list_speakers.py
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts/)
install(
PROGRAMS bin/text2wave
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/bin)

install(DIRECTORY launch
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS)

install(DIRECTORY
voicevox_engine voicevox_core
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS)
16 changes: 16 additions & 0 deletions 3rdparty/voicevox3/bin/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/bash

# Original arguments
args=("$@")

# Filtered arguments (up to first "--")
filtered_args=()
for arg in "${args[@]}"; do
if [[ "$arg" == -- ]]; then
break
fi
filtered_args+=("$arg")
done

# Now use "${filtered_args[@]}" instead of "$@"
exec rosrun voicevox3 python3 $(rospack find voicevox3)/voicevox_engine/run.py ${filtered_args[@]}
3 changes: 3 additions & 0 deletions 3rdparty/voicevox3/bin/text2wave
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/bash

exec rosrun voicevox3 request_synthesis.py $@
31 changes: 31 additions & 0 deletions 3rdparty/voicevox3/launch/voicevox_texttospeech.launch
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<launch>

<arg name="device" default="" />
<arg name="launch_sound_play" default="true" />
<arg name="sound_play_respawn" default="true"
doc="Respawn sound_play node or not (default: true)" />
<arg name="default_speaker" default="2"
doc="Default speaker for VOICEVOX" />
<arg name="cpu_num_threads" default="1"
doc="Number of cpu threads" />

<node name="voicevox_server"
pkg="voicevox3" type="run"
args="--voicelib_dir=$(find voicevox3)/voicevox_core --cpu_num_threads=$(arg cpu_num_threads) --load_all_models --"
respawn="$(arg sound_play_respawn)"
output="screen" >
</node>

<node if="$(arg launch_sound_play)"
name="sound_play_jp"
pkg="sound_play" type="soundplay_node.py"
respawn="$(arg sound_play_respawn)"
output="screen" >
<remap from="robotsound" to="robotsound_jp"/>
<remap from="sound_play" to="robotsound_jp"/>
<param name="default_voice" value="$(arg default_speaker)" />
<env name="VOICEVOX_DEFAULT_SPEAKER_ID" value="$(arg default_speaker)" />
<env name="PATH" value="$(find voicevox3)/bin:$(env PATH)" />
<env name="PYTHONIOENCODING" value="utf-8" />
</node>
</launch>
25 changes: 25 additions & 0 deletions 3rdparty/voicevox3/node_scripts/list_speakers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from voicevox import Client
import asyncio


async def main():
async with Client() as client:
# check core
for version in await client.fetch_core_versions():
print("Core version: {}".format(version))
# check engine
engine_version = await client.fetch_engine_version()
print("Engine version: {}".format(engine_version))
# check device
for device in await client.http.request("GET", "/supported_devices"):
print("Device: {}".format(device))
# check speaker
for speaker in await client.fetch_speakers():
print(speaker.uuid, speaker.name, speaker.supported_features.permitted_synthesis_morphing)
for styles in speaker.styles:
print(styles.id, speaker.name, styles.name)

if __name__ == "__main__":
asyncio.run(main())


124 changes: 124 additions & 0 deletions 3rdparty/voicevox3/node_scripts/request_synthesis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-

from voicevox import Client
import asyncio

import argparse
import os
import shutil
import sys

#import requests

# from voicevox.filecheck_utils import checksum_md5
# from voicevox.filecheck_utils import get_cache_dir


speaker_id_to_name = {
'0': '四国めたん-あまあま',
'1': 'ずんだもん-あまあま',
'2': '四国めたん-ノーマル',
'3': 'ずんだもん-ノーマル',
'4': '四国めたん-セクシー',
'5': 'ずんだもん-セクシー',
'6': '四国めたん-ツンツン',
'7': 'ずんだもん-ツンツン',
'8': '春日部つむぎ-ノーマル',
'9': '波音リツ-ノーマル',
'10': '雨晴はう-ノーマル',
'11': '玄野武宏-ノーマル',
'12': '白上虎太郎-ノーマル',
'13': '青山龍星-ノーマル',
'14': '冥鳴ひまり-ノーマル',
'15': '九州そら-あまあま',
'16': '九州そら-ノーマル',
'17': '九州そら-セクシー',
'18': '九州そら-ツンツン',
'19': '九州そら-ささやき',
}

name_to_speaker_id = {
b: a for a, b in speaker_id_to_name.items()
}


DEFAULT_SPEAKER_ID = os.environ.get(
'VOICEVOX_DEFAULT_SPEAKER_ID', '2')
if not DEFAULT_SPEAKER_ID.isdigit():
DEFAULT_SPEAKER_ID = name_to_speaker_id[DEFAULT_SPEAKER_ID]
VOICEVOX_TEXTTOSPEECH_URL = os.environ.get(
'VOICEVOX_TEXTTOSPEECH_URL', 'localhost')
VOICEVOX_TEXTTOSPEECH_PORT = os.environ.get(
'VOICEVOX_TEXTTOSPEECH_PORT', 50021)
cache_enabled = os.environ.get(
'ROS_VOICEVOX_TEXTTOSPEECH_CACHE_ENABLED', True)
cache_enabled = cache_enabled is True \
or cache_enabled == 'true' # for launch env tag.


def determine_voice_name(voice_name):
if len(voice_name) == 0:
speaker_id = DEFAULT_SPEAKER_ID
else:
if voice_name.isdigit():
if voice_name in speaker_id_to_name:
speaker_id = voice_name
else:
print(
'[Text2Wave] Invalid speaker_id ({}). Use default voice.'
.format(speaker_id_to_name[DEFAULT_SPEAKER_ID]))
speaker_id = DEFAULT_SPEAKER_ID
else:
candidates = list(filter(
lambda name: name.startswith(voice_name),
name_to_speaker_id))
if candidates:
speaker_id = name_to_speaker_id[candidates[0]]
else:
print('[Text2Wave] Invalid voice_name ({}). Use default voice.'
.format(speaker_id_to_name[DEFAULT_SPEAKER_ID]))
speaker_id = DEFAULT_SPEAKER_ID
print('[Text2Wave] Speak using voice_name ({})..'.format(
speaker_id_to_name[speaker_id]))
return speaker_id


def convert_to_str(x):
if isinstance(x, str):
pass
elif isinstance(x, bytes):
x = x.decode('utf-8')
else:
raise ValueError(
'Invalid input x type: {}'
.format(type(x)))
return x

async def request_synthesis(
sentence, output_path, speaker_id='1'):
async with Client() as client:
audio_query = await client.create_audio_query(sentence, speaker=speaker_id)
print(audio_query)
with open(output_path, "wb") as f:
f.write(await audio_query.synthesis(speaker=speaker_id))

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='')
parser.add_argument('-eval', '--evaluate')
parser.add_argument('-o', '--output')
parser.add_argument('text')
args = parser.parse_args()

with open(args.text, 'rb') as f:
speech_text = convert_to_str(f.readline())

print('args')
print(args)
speaker_id = args.evaluate.lstrip('(').rstrip(')')
print('id')
print(speaker_id)

asyncio.run(request_synthesis(speech_text,
args.output,
speaker_id))
24 changes: 24 additions & 0 deletions 3rdparty/voicevox3/package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0"?>
<?xml-model
href="http://download.ros.org/schema/package_format3.xsd"
schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>voicevox3</name>
<version>2.1.24</version>
<description>VOICEVOX: AI speech synthesis</description>
<maintainer email="yanokura@jsk.imi.i.u-tokyo.ac.jp">Iori Yanokura</maintainer>

<license>MIT</license>

<url type="website">http://ros.org/wiki/voicevox</url>

<author>Iori Yanokura</author>

<buildtool_depend>catkin</buildtool_depend>
<build_depend>catkin_virtualenv</build_depend>

<export>
<pip_requirements>requirements.txt</pip_requirements>
</export>

</package>
36 changes: 36 additions & 0 deletions 3rdparty/voicevox3/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
aiofiles==0.7.0 ; python_version >= "3.8" and python_version < "3.9"
anyio==3.6.2 ; python_version >= "3.8" and python_version < "3.9"
appdirs==1.4.4 ; python_version >= "3.8" and python_version < "3.9"
asgiref==3.6.0 ; python_version >= "3.8" and python_version < "3.9"
certifi==2022.12.7 ; python_version >= "3.8" and python_version < "3.9"
cffi==1.15.1 ; python_version >= "3.8" and python_version < "3.9"
charset-normalizer==2.1.1 ; python_version >= "3.8" and python_version < "3.9"
click==8.0.4 ; python_version >= "3.8" and python_version < "3.9"
colorama==0.4.4 ; python_version >= "3.8" and python_version < "3.9" and platform_system == "Windows"
cython==0.29.24 ; python_version >= "3.8" and python_version < "3.9"
fastapi==0.70.0 ; python_version >= "3.8" and python_version < "3.9"
h11==0.14.0 ; python_version >= "3.8" and python_version < "3.9"
idna==3.4 ; python_version >= "3.8" and python_version < "3.9"
jinja2==3.1.2 ; python_version >= "3.8" and python_version < "3.9"
markupsafe==2.1.1 ; python_version >= "3.8" and python_version < "3.9"
numpy==1.20.0 ; python_version >= "3.8" and python_version < "3.9"
pycparser==2.21 ; python_version >= "3.8" and python_version < "3.9"
pydantic==1.10.2 ; python_version >= "3.8" and python_version < "3.9"
# Commits on Mar 30, 2025 (https://github.com/VOICEVOX/pyopenjtalk/pull/22, use cmake<4.0.0)
pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@5b70b94f3460ece07ea183227db088ce8d5212a6
## pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk.git@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.8" and python_version < "3.9"
python-multipart==0.0.5 ; python_version >= "3.8" and python_version < "3.9"
pyworld==0.3.0 ; python_version >= "3.8" and python_version < "3.9"
pyyaml==6.0 ; python_version >= "3.8" and python_version < "3.9"
requests==2.28.1 ; python_version >= "3.8" and python_version < "3.9"
scipy==1.7.1 ; python_version >= "3.8" and python_version < "3.9"
six==1.16.0 ; python_version >= "3.8" and python_version < "3.9"
sniffio==1.3.0 ; python_version >= "3.8" and python_version < "3.9"
soundfile==0.10.3.post1 ; python_version >= "3.8" and python_version < "3.9"
starlette==0.16.0 ; python_version >= "3.8" and python_version < "3.9"
tqdm==4.64.1 ; python_version >= "3.8" and python_version < "3.9"
typing-extensions==4.4.0 ; python_version >= "3.8" and python_version < "3.9"
urllib3==1.26.13 ; python_version >= "3.8" and python_version < "3.9"
uvicorn==0.15.0 ; python_version >= "3.8" and python_version < "3.9"
##
voicevox-client==v0.1.5
12 changes: 12 additions & 0 deletions 3rdparty/voicevox3/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from distutils.core import setup

from catkin_pkg.python_setup import generate_distutils_setup
from setuptools import find_packages


d = generate_distutils_setup(
packages=find_packages('python'),
package_dir={'': 'python'},
)

setup(**d)
Loading