55from dotenv import load_dotenv
66import os
77
8+ from handle_text import prepare_tts_input_with_context
89from tts_handler import generate_speech , get_models , get_voices
9- from utils import require_api_key , AUDIO_FORMAT_MIME_TYPES
10+ from utils import getenv_bool , require_api_key , AUDIO_FORMAT_MIME_TYPES
1011
1112app = Flask (__name__ )
1213load_dotenv ()
1819DEFAULT_RESPONSE_FORMAT = os .getenv ('DEFAULT_RESPONSE_FORMAT' , 'mp3' )
1920DEFAULT_SPEED = float (os .getenv ('DEFAULT_SPEED' , 1.2 ))
2021
22+ REMOVE_FILTER = getenv_bool ('REMOVE_FILTER' , False )
23+ EXPAND_API = getenv_bool ('EXPAND_API' , True )
24+
2125# DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'tts-1')
2226
2327@app .route ('/v1/audio/speech' , methods = ['POST' ])
28+ @app .route ('/audio/speech' , methods = ['POST' ]) # Add this line for the alias
2429@require_api_key
2530def text_to_speech ():
2631 data = request .json
2732 if not data or 'input' not in data :
2833 return jsonify ({"error" : "Missing 'input' in request body" }), 400
2934
3035 text = data .get ('input' )
36+
37+ if not REMOVE_FILTER :
38+ text = prepare_tts_input_with_context (text )
39+
3140 # model = data.get('model', DEFAULT_MODEL)
3241 voice = data .get ('voice' , DEFAULT_VOICE )
3342
@@ -43,11 +52,13 @@ def text_to_speech():
4352 return send_file (output_file_path , mimetype = mime_type , as_attachment = True , download_name = f"speech.{ response_format } " )
4453
4554@app .route ('/v1/models' , methods = ['GET' , 'POST' ])
55+ @app .route ('/models' , methods = ['GET' , 'POST' ])
4656@require_api_key
4757def list_models ():
4858 return jsonify ({"data" : get_models ()})
4959
5060@app .route ('/v1/voices' , methods = ['GET' , 'POST' ])
61+ @app .route ('/voices' , methods = ['GET' , 'POST' ])
5162@require_api_key
5263def list_voices ():
5364 specific_language = None
@@ -59,10 +70,91 @@ def list_voices():
5970 return jsonify ({"voices" : get_voices (specific_language )})
6071
6172@app .route ('/v1/voices/all' , methods = ['GET' , 'POST' ])
73+ @app .route ('/voices/all' , methods = ['GET' , 'POST' ])
6274@require_api_key
6375def list_all_voices ():
6476 return jsonify ({"voices" : get_voices ('all' )})
6577
78+ """
79+ Support for ElevenLabs and Azure AI Speech
80+ (currently in beta)
81+ """
82+
83+ # http://localhost:5050/elevenlabs/v1/text-to-speech
84+ # http://localhost:5050/elevenlabs/v1/text-to-speech/en-US-AndrewNeural
85+ @app .route ('/elevenlabs/v1/text-to-speech/<voice_id>' , methods = ['POST' ])
86+ @require_api_key
87+ def elevenlabs_tts (voice_id ):
88+ if not EXPAND_API :
89+ return jsonify ({"error" : f"Endpoint not allowed" }), 500
90+
91+ # Parse the incoming JSON payload
92+ try :
93+ payload = request .json
94+ if not payload or 'text' not in payload :
95+ return jsonify ({"error" : "Missing 'text' in request body" }), 400
96+ except Exception as e :
97+ return jsonify ({"error" : f"Invalid JSON payload: { str (e )} " }), 400
98+
99+ text = payload ['text' ]
100+
101+ if not REMOVE_FILTER :
102+ text = prepare_tts_input_with_context (text )
103+
104+ voice = voice_id # ElevenLabs uses the voice_id in the URL
105+
106+ # Use default settings for edge-tts
107+ response_format = 'mp3'
108+ speed = DEFAULT_SPEED # Optional customization via payload.get('speed', DEFAULT_SPEED)
109+
110+ # Generate speech using edge-tts
111+ try :
112+ output_file_path = generate_speech (text , voice , response_format , speed )
113+ except Exception as e :
114+ return jsonify ({"error" : f"TTS generation failed: { str (e )} " }), 500
115+
116+ # Return the generated audio file
117+ return send_file (output_file_path , mimetype = "audio/mpeg" , as_attachment = True , download_name = "speech.mp3" )
118+
119+ # tts.speech.microsoft.com/cognitiveservices/v1
120+ # https://{region}.tts.speech.microsoft.com/cognitiveservices/v1
121+ # http://localhost:5050/azure/cognitiveservices/v1
122+ @app .route ('/azure/cognitiveservices/v1' , methods = ['POST' ])
123+ @require_api_key
124+ def azure_tts ():
125+ if not EXPAND_API :
126+ return jsonify ({"error" : f"Endpoint not allowed" }), 500
127+
128+ # Parse the SSML payload
129+ try :
130+ ssml_data = request .data .decode ('utf-8' )
131+ if not ssml_data :
132+ return jsonify ({"error" : "Missing SSML payload" }), 400
133+
134+ # Extract the text and voice from SSML
135+ from xml .etree import ElementTree as ET
136+ root = ET .fromstring (ssml_data )
137+ text = root .find ('.//{http://www.w3.org/2001/10/synthesis}voice' ).text
138+ voice = root .find ('.//{http://www.w3.org/2001/10/synthesis}voice' ).get ('name' )
139+ except Exception as e :
140+ return jsonify ({"error" : f"Invalid SSML payload: { str (e )} " }), 400
141+
142+ # Use default settings for edge-tts
143+ response_format = 'mp3'
144+ speed = DEFAULT_SPEED
145+
146+ if not REMOVE_FILTER :
147+ text = prepare_tts_input_with_context (text )
148+
149+ # Generate speech using edge-tts
150+ try :
151+ output_file_path = generate_speech (text , voice , response_format , speed )
152+ except Exception as e :
153+ return jsonify ({"error" : f"TTS generation failed: { str (e )} " }), 500
154+
155+ # Return the generated audio file
156+ return send_file (output_file_path , mimetype = "audio/mpeg" , as_attachment = True , download_name = "speech.mp3" )
157+
66158print (f" Edge TTS (Free Azure TTS) Replacement for OpenAI's TTS API" )
67159print (f" " )
68160print (f" * Serving OpenAI Edge TTS" )
0 commit comments