Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/publish_stable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
python-version: "3.11"
- name: Install Build Tools
run: |
python -m pip install build wheel
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
python-version: "3.11"
- name: Install Build Tools
run: |
python -m pip install build wheel
Expand Down Expand Up @@ -75,7 +75,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: '3.14'
python-version: '3.11'

- name: Get version from setup.py
id: get_version
Expand Down
128 changes: 106 additions & 22 deletions ovos_tts_server/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
from typing import Optional, Tuple

from fastapi import FastAPI, Request, Response
from typing import Optional, Tuple, Literal
from fastapi import FastAPI, Request, Depends, Response
from fastapi.responses import FileResponse
from ovos_config import Configuration
from pydantic import BaseModel, Field
from ovos_plugin_manager.tts import load_tts_plugin
from ovos_config import Configuration

class MaryTTSInput(BaseModel):
"""
Pydantic model for validating MaryTTS /process API requests.
Supports both standard MaryTTS params and basic defaults.
"""
INPUT_TEXT: str = Field(..., description="The text to synthesize")
INPUT_TYPE: Literal["TEXT", "SSML"] = "TEXT"
LOCALE: Optional[str] = Field(None, description="Target Locale (e.g. en_US)")
VOICE: Optional[str] = Field(None, description="Target Voice name")
OUTPUT_TYPE: str = "AUDIO"
AUDIO: str = "WAVE_FILE"


class TTSEngineWrapper:
Expand All @@ -28,23 +40,34 @@ def __init__(self, plugin_name: str, cache: bool = False):
@property
def langs(self):
"""
Return the list of languages supported by the wrapped TTS engine.
List languages supported by the wrapped TTS engine.

Returns:
list[str]: Engine-reported available language codes, or a list containing the wrapper's default language if the engine does not expose available languages.
list[str]: Engine-reported language codes, or a single-item list containing the wrapper's default language if the engine does not provide available languages.
"""
return self.engine.available_languages or [self.lang]

@property
def voices(self):
"""
Return the available voices exposed by the wrapped TTS engine.

A list of available voices — each item is typically a dict or a string depending on the engine implementation. Returns an empty list if the wrapped engine does not expose an `available_voices` attribute.
"""
if hasattr(self.engine, "available_voices"):
return self.engine.available_voices
return []

def synthesize(self, utterance: str, **kwargs) -> Tuple[str, Optional[str]]:
"""
Synthesize spoken audio from the given text or SSML.
Synthesize speech audio from the provided text or SSML.

Parameters:
utterance (str): Text or SSML to synthesize.
kwargs: Plugin-specific synthesis parameters forwarded to the underlying TTS engine.
utterance (str): Text or SSML to synthesize.
**kwargs: Plugin-specific synthesis options forwarded to the underlying TTS engine (e.g., lang, voice, format).

Returns:
tuple (str, Optional[str]): `(audio_path, phonemes)` where `audio_path` is the file path to the generated audio and `phonemes` is the phoneme data produced by the engine, or `None` if not available.
tuple: `(audio_path, phonemes)` where `audio_path` is the file path to the generated audio and `phonemes` is the phoneme data produced by the engine, or `None` if not available.
"""
utterance = self.engine.validate_ssml(utterance)
audio, phonemes = self.engine.synth(utterance, **kwargs)
Expand Down Expand Up @@ -87,17 +110,78 @@ def status() -> dict:
"default_voice": config.get("voice")
}

# legacy OVOS endpoints
# --- MaryTTS Compatibility Endpoints ---

@app.get("/locales")
def mary_locales():
"""
Provide supported locales in MaryTTS-compatible plain-text format.

Returns:
A plain-text HTTP response whose body contains supported locale identifiers,
one per line (newline-separated).
"""
langs = tts_engine.langs
# Ensure we return plain text, not JSON
return Response(content="\n".join(langs), media_type="text/plain")

@app.get("/voices")
def mary_voices():
"""
Provide a MaryTTS-compatible plain-text listing of available voices.

Each line has the format: "<name> <locale> <gender>". The generated name must not contain spaces; the response currently emits a single default voice line based on the server's TTS engine.

Returns:
A FastAPI Response with media_type "text/plain" whose body is newline-separated voice entries.
"""
lines = []

# plugins don't report specific voices - TODO - add available_voices/models property to TTS plugins
lines.append(f"default {tts_engine.lang} m {tts_engine.plugin_name}")

return Response(content="\n".join(lines), media_type="text/plain")

@app.api_route("/process", methods=["GET", "POST"])
def mary_process(params: MaryTTSInput = Depends()):
"""
Handle MaryTTS-compatible /process requests and return the synthesized audio as a WAV FileResponse.

Maps MaryTTS parameters to TTS engine options (LOCALE → lang, VOICE underscores replaced with spaces) and synthesizes INPUT_TEXT via the injected TTS engine.

Parameters:
params (MaryTTSInput): Validated MaryTTS request parameters injected via Depends(); contains INPUT_TEXT, INPUT_TYPE, LOCALE, VOICE, OUTPUT_TYPE, and AUDIO.

Returns:
FileResponse: A response serving the synthesized audio as a WAV file.
"""
# Map MaryTTS specific params to OVOS synthesize params
synth_kwargs = {}

if params.LOCALE:
synth_kwargs["lang"] = params.LOCALE

if params.VOICE:
# Revert the space sanitization if the plugin needs real spaces
# (Though most OVOS plugins map by ID, strict names might differ)
synth_kwargs["voice"] = params.VOICE.replace("_", " ")

audio_path, _ = tts_engine.synthesize(params.INPUT_TEXT, **synth_kwargs)
return FileResponse(audio_path, media_type="audio/wav")

# --- Legacy OVOS Endpoints ---

@app.get("/synthesize/{utterance}")
async def synth_legacy(utterance: str, request: Request) -> FileResponse:
"""
Generate and return synthesized audio for the given utterance, forwarding query parameters to the TTS plugin.
Produce synthesized audio for an utterance using query parameters from the incoming request.

Parameters:
request (Request): The incoming FastAPI request whose query parameters are forwarded to the TTS plugin.
utterance (str): Text to synthesize.
request (Request): Incoming FastAPI request whose query parameters are forwarded to the TTS plugin as synthesis options.

Returns:
FileResponse: A response serving the synthesized audio file.
FileResponse: A response serving the generated audio file.
"""
audio_path, _ = tts_engine.synthesize(utterance, **request.query_params)
return FileResponse(audio_path)
Expand Down Expand Up @@ -128,15 +212,15 @@ async def synth_v2(request: Request) -> FileResponse:

def start_tts_server(tts_plugin: str, cache: bool = False) -> Tuple[FastAPI, TTSEngineWrapper]:
"""
Initialize TTS engine and create FastAPI app.

Args:
tts_plugin: TTS plugin name to load.
cache: Whether to persist cached audio across reboots.

Create and configure a FastAPI application wired to a TTSEngineWrapper for the specified plugin.
Parameters:
tts_plugin (str): Name of the TTS plugin to load.
cache (bool): If True, persist synthesized audio cache across restarts.
Returns:
Tuple of FastAPI app and TTS engine wrapper.
Tuple[FastAPI, TTSEngineWrapper]: The configured FastAPI app and the initialized TTS engine wrapper.
"""
tts_engine = TTSEngineWrapper(plugin_name=tts_plugin, cache=cache)
app = create_app(tts_engine)
return app, tts_engine
return app, tts_engine
1 change: 1 addition & 0 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ ovos-plugin-manager>=2.1.0,<3.0.0
fastapi~=0.115
uvicorn~=0.34
ovos-utils>=0.0.38
pydantic