feat: maryTTS api compat

JarbasAl · JarbasAl · commit 7ce4021a37b1 · 2026-01-26T20:10:29.000Z
diff --git a/ovos_tts_server/__init__.py b/ovos_tts_server/__init__.py
@@ -1,9 +1,22 @@
-from typing import Optional, Tuple
-from fastapi import FastAPI, Request, Depends
+from typing import Optional, Tuple, Literal
+from fastapi import FastAPI, Request, Depends, Response
 from fastapi.responses import FileResponse
+from pydantic import BaseModel, Field
 from ovos_plugin_manager.tts import load_tts_plugin
 from ovos_config import Configuration
 
+class MaryTTSInput(BaseModel):
+    """
+    Pydantic model for validating MaryTTS /process API requests.
+    Supports both standard MaryTTS params and basic defaults.
+    """
+    INPUT_TEXT: str = Field(..., description="The text to synthesize")
+    INPUT_TYPE: Literal["TEXT", "SSML"] = "TEXT"
+    LOCALE: Optional[str] = Field(None, description="Target Locale (e.g. en_US)")
+    VOICE: Optional[str] = Field(None, description="Target Voice name")
+    OUTPUT_TYPE: str = "AUDIO"
+    AUDIO: str = "WAVE_FILE"
+
 
 class TTSEngineWrapper:
     """Wrapper around an OVOS TTS engine for dependency injection."""
@@ -34,6 +47,16 @@ def langs(self):
         """
         return self.engine.available_languages or [self.lang]
 
+    @property
+    def voices(self):
+        """
+        Attempt to retrieve available voices from the plugin.
+        Returns a list of dictionaries or strings depending on the plugin.
+        """
+        if hasattr(self.engine, "available_voices"):
+            return self.engine.available_voices
+        return []
+
     def synthesize(self, utterance: str, **kwargs) -> Tuple[str, Optional[str]]:
         """
         Synthesize spoken audio from the given text or SSML.
@@ -86,7 +109,54 @@ def status() -> dict:
             "default_voice": config.get("voice")
         }
 
-    # legacy OVOS endpoints
+    # --- MaryTTS Compatibility Endpoints ---
+
+    @app.get("/locales")
+    def mary_locales():
+        """
+        MaryTTS Compatibility: Returns a newline-separated list of supported locales.
+        Format: [locale]\n...
+        """
+        langs = tts_engine.langs
+        # Ensure we return plain text, not JSON
+        return Response(content="\n".join(langs), media_type="text/plain")
+
+    @app.get("/voices")
+    def mary_voices():
+        """
+        MaryTTS Compatibility: Returns a list of supported voices.
+        Format: [name] [locale] [gender]\n...
+        Note: Name must be space-free.
+        """
+        lines = []
+
+        # plugins don't report specific voices - TODO - add available_voices/models property to TTS plugins
+        lines.append(f"default {tts_engine.lang} m {tts_engine.plugin_name}")
+
+        return Response(content="\n".join(lines), media_type="text/plain")
+
+    @app.api_route("/process", methods=["GET", "POST"])
+    def mary_process(params: MaryTTSInput = Depends()):
+        """
+        MaryTTS Compatibility: Processes input text and returns a wav file.
+        Accepts both GET and POST parameters validated by Pydantic.
+        """
+        # Map MaryTTS specific params to OVOS synthesize params
+        synth_kwargs = {}
+
+        if params.LOCALE:
+            synth_kwargs["lang"] = params.LOCALE
+
+        if params.VOICE:
+            # Revert the space sanitization if the plugin needs real spaces
+            # (Though most OVOS plugins map by ID, strict names might differ)
+            synth_kwargs["voice"] = params.VOICE.replace("_", " ")
+
+        audio_path, _ = tts_engine.synthesize(params.INPUT_TEXT, **synth_kwargs)
+        return FileResponse(audio_path, media_type="audio/wav")
+
+    # --- Legacy OVOS Endpoints ---
+
     @app.get("/synthesize/{utterance}")
     async def synth_legacy(utterance: str, request: Request) -> FileResponse:
         """
@@ -114,7 +184,7 @@ async def synth_v2(request: Request) -> FileResponse:
         """
         utterance = request.query_params.get("utterance")
         if not utterance:
-            return {"error": "Missing 'utterance' query parameter"}
+            return Response(content='{"error": "Missing utterance"}', status_code=400, media_type="application/json")
 
         # Pass all plugin-specific options
         plugin_params = dict(request.query_params)
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -2,3 +2,4 @@ ovos-plugin-manager>=2.1.0,<3.0.0
 fastapi~=0.115
 uvicorn~=0.34
 ovos-utils>=0.0.38
+pydantic