|
1 | | -from typing import Optional, Tuple |
2 | | -from fastapi import FastAPI, Request, Depends |
| 1 | +from typing import Optional, Tuple, Literal |
| 2 | +from fastapi import FastAPI, Request, Depends, Response |
3 | 3 | from fastapi.responses import FileResponse |
| 4 | +from pydantic import BaseModel, Field |
4 | 5 | from ovos_plugin_manager.tts import load_tts_plugin |
5 | 6 | from ovos_config import Configuration |
6 | 7 |
|
| 8 | +class MaryTTSInput(BaseModel): |
| 9 | + """ |
| 10 | + Pydantic model for validating MaryTTS /process API requests. |
| 11 | + Supports both standard MaryTTS params and basic defaults. |
| 12 | + """ |
| 13 | + INPUT_TEXT: str = Field(..., description="The text to synthesize") |
| 14 | + INPUT_TYPE: Literal["TEXT", "SSML"] = "TEXT" |
| 15 | + LOCALE: Optional[str] = Field(None, description="Target Locale (e.g. en_US)") |
| 16 | + VOICE: Optional[str] = Field(None, description="Target Voice name") |
| 17 | + OUTPUT_TYPE: str = "AUDIO" |
| 18 | + AUDIO: str = "WAVE_FILE" |
| 19 | + |
7 | 20 |
|
8 | 21 | class TTSEngineWrapper: |
9 | 22 | """Wrapper around an OVOS TTS engine for dependency injection.""" |
@@ -34,6 +47,16 @@ def langs(self): |
34 | 47 | """ |
35 | 48 | return self.engine.available_languages or [self.lang] |
36 | 49 |
|
| 50 | + @property |
| 51 | + def voices(self): |
| 52 | + """ |
| 53 | + Attempt to retrieve available voices from the plugin. |
| 54 | + Returns a list of dictionaries or strings depending on the plugin. |
| 55 | + """ |
| 56 | + if hasattr(self.engine, "available_voices"): |
| 57 | + return self.engine.available_voices |
| 58 | + return [] |
| 59 | + |
37 | 60 | def synthesize(self, utterance: str, **kwargs) -> Tuple[str, Optional[str]]: |
38 | 61 | """ |
39 | 62 | Synthesize spoken audio from the given text or SSML. |
@@ -86,7 +109,54 @@ def status() -> dict: |
86 | 109 | "default_voice": config.get("voice") |
87 | 110 | } |
88 | 111 |
|
89 | | - # legacy OVOS endpoints |
| 112 | + # --- MaryTTS Compatibility Endpoints --- |
| 113 | + |
| 114 | + @app.get("/locales") |
| 115 | + def mary_locales(): |
| 116 | + """ |
| 117 | + MaryTTS Compatibility: Returns a newline-separated list of supported locales. |
| 118 | + Format: [locale]\n... |
| 119 | + """ |
| 120 | + langs = tts_engine.langs |
| 121 | + # Ensure we return plain text, not JSON |
| 122 | + return Response(content="\n".join(langs), media_type="text/plain") |
| 123 | + |
| 124 | + @app.get("/voices") |
| 125 | + def mary_voices(): |
| 126 | + """ |
| 127 | + MaryTTS Compatibility: Returns a list of supported voices. |
| 128 | + Format: [name] [locale] [gender]\n... |
| 129 | + Note: Name must be space-free. |
| 130 | + """ |
| 131 | + lines = [] |
| 132 | + |
| 133 | + # plugins don't report specific voices - TODO - add available_voices/models property to TTS plugins |
| 134 | + lines.append(f"default {tts_engine.lang} m {tts_engine.plugin_name}") |
| 135 | + |
| 136 | + return Response(content="\n".join(lines), media_type="text/plain") |
| 137 | + |
| 138 | + @app.api_route("/process", methods=["GET", "POST"]) |
| 139 | + def mary_process(params: MaryTTSInput = Depends()): |
| 140 | + """ |
| 141 | + MaryTTS Compatibility: Processes input text and returns a wav file. |
| 142 | + Accepts both GET and POST parameters validated by Pydantic. |
| 143 | + """ |
| 144 | + # Map MaryTTS specific params to OVOS synthesize params |
| 145 | + synth_kwargs = {} |
| 146 | + |
| 147 | + if params.LOCALE: |
| 148 | + synth_kwargs["lang"] = params.LOCALE |
| 149 | + |
| 150 | + if params.VOICE: |
| 151 | + # Revert the space sanitization if the plugin needs real spaces |
| 152 | + # (Though most OVOS plugins map by ID, strict names might differ) |
| 153 | + synth_kwargs["voice"] = params.VOICE.replace("_", " ") |
| 154 | + |
| 155 | + audio_path, _ = tts_engine.synthesize(params.INPUT_TEXT, **synth_kwargs) |
| 156 | + return FileResponse(audio_path, media_type="audio/wav") |
| 157 | + |
| 158 | + # --- Legacy OVOS Endpoints --- |
| 159 | + |
90 | 160 | @app.get("/synthesize/{utterance}") |
91 | 161 | async def synth_legacy(utterance: str, request: Request) -> FileResponse: |
92 | 162 | """ |
@@ -114,7 +184,7 @@ async def synth_v2(request: Request) -> FileResponse: |
114 | 184 | """ |
115 | 185 | utterance = request.query_params.get("utterance") |
116 | 186 | if not utterance: |
117 | | - return {"error": "Missing 'utterance' query parameter"} |
| 187 | + return Response(content='{"error": "Missing utterance"}', status_code=400, media_type="application/json") |
118 | 188 |
|
119 | 189 | # Pass all plugin-specific options |
120 | 190 | plugin_params = dict(request.query_params) |
|
0 commit comments