diff --git a/.github/workflows/publish_stable.yml b/.github/workflows/publish_stable.yml index 3a42c59..c8ea019 100644 --- a/.github/workflows/publish_stable.yml +++ b/.github/workflows/publish_stable.yml @@ -26,7 +26,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v6 with: - python-version: "3.14" + python-version: "3.11" - name: Install Build Tools run: | python -m pip install build wheel diff --git a/.github/workflows/release_workflow.yml b/.github/workflows/release_workflow.yml index 3d661ac..dd46826 100644 --- a/.github/workflows/release_workflow.yml +++ b/.github/workflows/release_workflow.yml @@ -46,7 +46,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v6 with: - python-version: "3.14" + python-version: "3.11" - name: Install Build Tools run: | python -m pip install build wheel @@ -75,7 +75,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v6 with: - python-version: '3.14' + python-version: '3.11' - name: Get version from setup.py id: get_version diff --git a/ovos_tts_server/__init__.py b/ovos_tts_server/__init__.py index 03056c0..5861c2c 100644 --- a/ovos_tts_server/__init__.py +++ b/ovos_tts_server/__init__.py @@ -1,9 +1,21 @@ -from typing import Optional, Tuple - -from fastapi import FastAPI, Request, Response +from typing import Optional, Tuple, Literal +from fastapi import FastAPI, Request, Depends, Response from fastapi.responses import FileResponse -from ovos_config import Configuration +from pydantic import BaseModel, Field from ovos_plugin_manager.tts import load_tts_plugin +from ovos_config import Configuration + +class MaryTTSInput(BaseModel): + """ + Pydantic model for validating MaryTTS /process API requests. + Supports both standard MaryTTS params and basic defaults. + """ + INPUT_TEXT: str = Field(..., description="The text to synthesize") + INPUT_TYPE: Literal["TEXT", "SSML"] = "TEXT" + LOCALE: Optional[str] = Field(None, description="Target Locale (e.g. en_US)") + VOICE: Optional[str] = Field(None, description="Target Voice name") + OUTPUT_TYPE: str = "AUDIO" + AUDIO: str = "WAVE_FILE" class TTSEngineWrapper: @@ -28,23 +40,34 @@ def __init__(self, plugin_name: str, cache: bool = False): @property def langs(self): """ - Return the list of languages supported by the wrapped TTS engine. + List languages supported by the wrapped TTS engine. Returns: - list[str]: Engine-reported available language codes, or a list containing the wrapper's default language if the engine does not expose available languages. + list[str]: Engine-reported language codes, or a single-item list containing the wrapper's default language if the engine does not provide available languages. """ return self.engine.available_languages or [self.lang] + @property + def voices(self): + """ + Return the available voices exposed by the wrapped TTS engine. + + A list of available voices — each item is typically a dict or a string depending on the engine implementation. Returns an empty list if the wrapped engine does not expose an `available_voices` attribute. + """ + if hasattr(self.engine, "available_voices"): + return self.engine.available_voices + return [] + def synthesize(self, utterance: str, **kwargs) -> Tuple[str, Optional[str]]: """ - Synthesize spoken audio from the given text or SSML. + Synthesize speech audio from the provided text or SSML. Parameters: - utterance (str): Text or SSML to synthesize. - kwargs: Plugin-specific synthesis parameters forwarded to the underlying TTS engine. + utterance (str): Text or SSML to synthesize. + **kwargs: Plugin-specific synthesis options forwarded to the underlying TTS engine (e.g., lang, voice, format). Returns: - tuple (str, Optional[str]): `(audio_path, phonemes)` where `audio_path` is the file path to the generated audio and `phonemes` is the phoneme data produced by the engine, or `None` if not available. + tuple: `(audio_path, phonemes)` where `audio_path` is the file path to the generated audio and `phonemes` is the phoneme data produced by the engine, or `None` if not available. """ utterance = self.engine.validate_ssml(utterance) audio, phonemes = self.engine.synth(utterance, **kwargs) @@ -87,17 +110,78 @@ def status() -> dict: "default_voice": config.get("voice") } - # legacy OVOS endpoints + # --- MaryTTS Compatibility Endpoints --- + + @app.get("/locales") + def mary_locales(): + """ + Provide supported locales in MaryTTS-compatible plain-text format. + + Returns: + A plain-text HTTP response whose body contains supported locale identifiers, + one per line (newline-separated). + """ + langs = tts_engine.langs + # Ensure we return plain text, not JSON + return Response(content="\n".join(langs), media_type="text/plain") + + @app.get("/voices") + def mary_voices(): + """ + Provide a MaryTTS-compatible plain-text listing of available voices. + + Each line has the format: " ". The generated name must not contain spaces; the response currently emits a single default voice line based on the server's TTS engine. + + Returns: + A FastAPI Response with media_type "text/plain" whose body is newline-separated voice entries. + """ + lines = [] + + # plugins don't report specific voices - TODO - add available_voices/models property to TTS plugins + lines.append(f"default {tts_engine.lang} m {tts_engine.plugin_name}") + + return Response(content="\n".join(lines), media_type="text/plain") + + @app.api_route("/process", methods=["GET", "POST"]) + def mary_process(params: MaryTTSInput = Depends()): + """ + Handle MaryTTS-compatible /process requests and return the synthesized audio as a WAV FileResponse. + + Maps MaryTTS parameters to TTS engine options (LOCALE → lang, VOICE underscores replaced with spaces) and synthesizes INPUT_TEXT via the injected TTS engine. + + Parameters: + params (MaryTTSInput): Validated MaryTTS request parameters injected via Depends(); contains INPUT_TEXT, INPUT_TYPE, LOCALE, VOICE, OUTPUT_TYPE, and AUDIO. + + Returns: + FileResponse: A response serving the synthesized audio as a WAV file. + """ + # Map MaryTTS specific params to OVOS synthesize params + synth_kwargs = {} + + if params.LOCALE: + synth_kwargs["lang"] = params.LOCALE + + if params.VOICE: + # Revert the space sanitization if the plugin needs real spaces + # (Though most OVOS plugins map by ID, strict names might differ) + synth_kwargs["voice"] = params.VOICE.replace("_", " ") + + audio_path, _ = tts_engine.synthesize(params.INPUT_TEXT, **synth_kwargs) + return FileResponse(audio_path, media_type="audio/wav") + + # --- Legacy OVOS Endpoints --- + @app.get("/synthesize/{utterance}") async def synth_legacy(utterance: str, request: Request) -> FileResponse: """ - Generate and return synthesized audio for the given utterance, forwarding query parameters to the TTS plugin. + Produce synthesized audio for an utterance using query parameters from the incoming request. Parameters: - request (Request): The incoming FastAPI request whose query parameters are forwarded to the TTS plugin. + utterance (str): Text to synthesize. + request (Request): Incoming FastAPI request whose query parameters are forwarded to the TTS plugin as synthesis options. Returns: - FileResponse: A response serving the synthesized audio file. + FileResponse: A response serving the generated audio file. """ audio_path, _ = tts_engine.synthesize(utterance, **request.query_params) return FileResponse(audio_path) @@ -128,15 +212,15 @@ async def synth_v2(request: Request) -> FileResponse: def start_tts_server(tts_plugin: str, cache: bool = False) -> Tuple[FastAPI, TTSEngineWrapper]: """ - Initialize TTS engine and create FastAPI app. - - Args: - tts_plugin: TTS plugin name to load. - cache: Whether to persist cached audio across reboots. - + Create and configure a FastAPI application wired to a TTSEngineWrapper for the specified plugin. + + Parameters: + tts_plugin (str): Name of the TTS plugin to load. + cache (bool): If True, persist synthesized audio cache across restarts. + Returns: - Tuple of FastAPI app and TTS engine wrapper. + Tuple[FastAPI, TTSEngineWrapper]: The configured FastAPI app and the initialized TTS engine wrapper. """ tts_engine = TTSEngineWrapper(plugin_name=tts_plugin, cache=cache) app = create_app(tts_engine) - return app, tts_engine + return app, tts_engine \ No newline at end of file diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 591ae36..9ab3e64 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -2,3 +2,4 @@ ovos-plugin-manager>=2.1.0,<3.0.0 fastapi~=0.115 uvicorn~=0.34 ovos-utils>=0.0.38 +pydantic \ No newline at end of file