66
77import re
88import logging
9- from typing import Optional , Literal
9+ from typing import Optional , Literal , List
1010from elevenlabs .client import ElevenLabs
1111from bs4 import BeautifulSoup
1212
@@ -97,6 +97,61 @@ def extract_summary_text_for_tts(html_content: str) -> str:
9797 return text
9898
9999
100+ def _split_text_into_chunks (text : str , max_chars : int ) -> List [str ]:
101+ """
102+ Split text into chunks at sentence/paragraph boundaries, each under max_chars.
103+
104+ Splits preferably at paragraph breaks (double newline), then sentence ends
105+ (. ! ?). If a single sentence still exceeds max_chars, it is split at the
106+ last space before the limit.
107+
108+ Args:
109+ text: Text to split
110+ max_chars: Maximum characters per chunk
111+
112+ Returns:
113+ List of text chunks, each at most max_chars characters
114+ """
115+ if len (text ) <= max_chars :
116+ return [text ]
117+
118+ # Split on sentence-ending punctuation followed by spaces, or paragraph breaks.
119+ # The lookbehind keeps the punctuation attached to the preceding sentence.
120+ parts = re .split (r"(?<=[.!?]) +|\n\n" , text )
121+
122+ chunks : List [str ] = []
123+ current_chunk = ""
124+
125+ for part in parts :
126+ part = part .strip ()
127+ if not part :
128+ continue
129+
130+ candidate = (current_chunk + " " + part ).strip () if current_chunk else part
131+
132+ if len (candidate ) <= max_chars :
133+ current_chunk = candidate
134+ else :
135+ # Flush the accumulated chunk
136+ if current_chunk :
137+ chunks .append (current_chunk )
138+
139+ # If the part itself is too long, hard-split at word boundaries
140+ while len (part ) > max_chars :
141+ split_at = part .rfind (" " , 0 , max_chars )
142+ if split_at == - 1 :
143+ split_at = max_chars
144+ chunks .append (part [:split_at ])
145+ part = part [split_at :].lstrip ()
146+
147+ current_chunk = part
148+
149+ if current_chunk :
150+ chunks .append (current_chunk )
151+
152+ return chunks
153+
154+
100155def _generate_audio_openai (
101156 text : str ,
102157 voice : str ,
@@ -122,40 +177,44 @@ def _generate_audio_openai(
122177
123178 Note:
124179 OpenAI TTS has a 4096 character limit per request.
125- For longer text, this function will truncate.
180+ Longer text is split into chunks at sentence boundaries and the
181+ resulting audio bytes are concatenated into a single MP3 stream.
126182 """
127183 max_chars = 4096
184+ chunks = _split_text_into_chunks (text , max_chars )
128185
129- # Truncate if needed
130- if len (text ) > max_chars :
131- text = text [: max_chars - 3 ] + "..."
186+ if len (chunks ) > 1 :
187+ logger .info (
188+ f"Text split into { len (chunks )} chunks for OpenAI TTS ({ len (text )} chars total)"
189+ )
132190
133- try :
134- # Use tracked client for automatic usage tracking
135- client = get_tracked_openai_client ()
191+ client = get_tracked_openai_client ()
192+ audio_parts : List [bytes ] = []
136193
137- response = client .text_to_speech (
138- text = text ,
139- voice = voice ,
140- model = model ,
141- feature = feature ,
142- video_id = video_id ,
143- )
194+ for chunk in chunks :
195+ try :
196+ response = client .text_to_speech (
197+ text = chunk ,
198+ voice = voice ,
199+ model = model ,
200+ feature = feature ,
201+ video_id = video_id ,
202+ )
203+ audio_parts .append (response .read ())
144204
145- # Read the audio data from the response
146- return response . read ( )
205+ except Exception as e :
206+ error_msg = str ( e )
147207
148- except Exception as e :
149- error_msg = str (e )
150-
151- if "401" in error_msg or "unauthorized" in error_msg .lower ():
152- raise TTSAPIError ("Invalid OpenAI API key" )
153- elif "429" in error_msg or "rate" in error_msg .lower ():
154- raise TTSAPIError (f"Rate limited: { error_msg } " )
155- elif "insufficient_quota" in error_msg .lower ():
156- raise TTSAPIError (f"Insufficient quota: { error_msg } " )
157- else :
158- raise TTSAPIError (f"OpenAI TTS failed: { error_msg } " )
208+ if "401" in error_msg or "unauthorized" in error_msg .lower ():
209+ raise TTSAPIError ("Invalid OpenAI API key" )
210+ elif "429" in error_msg or "rate" in error_msg .lower ():
211+ raise TTSAPIError (f"Rate limited: { error_msg } " )
212+ elif "insufficient_quota" in error_msg .lower ():
213+ raise TTSAPIError (f"Insufficient quota: { error_msg } " )
214+ else :
215+ raise TTSAPIError (f"OpenAI TTS failed: { error_msg } " )
216+
217+ return b"" .join (audio_parts )
159218
160219
161220def _generate_audio_elevenlabs (
@@ -198,61 +257,66 @@ def _generate_audio_elevenlabs(
198257 "eleven_monolingual_v1" : 5000 ,
199258 }
200259 max_chars = model_limits .get (model_id , 10000 )
260+ chunks = _split_text_into_chunks (text , max_chars )
201261
202- # Truncate if needed
203- if len (text ) > max_chars :
204- text = text [: max_chars - 3 ] + "..."
205-
206- try :
207- client = ElevenLabs (api_key = api_key )
208-
209- audio_generator = client .text_to_speech .convert (
210- text = text ,
211- voice_id = voice_id ,
212- model_id = model_id ,
213- output_format = output_format ,
262+ if len (chunks ) > 1 :
263+ logger .info (
264+ f"Text split into { len (chunks )} chunks for ElevenLabs TTS ({ len (text )} chars total)"
214265 )
215266
216- audio_data = b"" .join (audio_generator )
267+ client = ElevenLabs (api_key = api_key )
268+ audio_parts : List [bytes ] = []
269+ total_chars = 0
217270
218- # Track usage - ElevenLabs TTS priced per character
271+ for chunk in chunks :
219272 try :
220- metadata = {
221- "character_count" : len (text ),
222- "voice_id" : voice_id ,
223- "output_format" : output_format ,
224- }
225-
226- log_llm_usage (
227- provider = "elevenlabs" ,
228- model = model_id ,
229- feature = feature ,
230- prompt_tokens = len (text ), # Store character count in prompt_tokens
231- response_tokens = 0 , # TTS doesn't have response tokens
232- video_id = video_id ,
233- metadata = metadata ,
234- )
235- logger .info (
236- f"ElevenLabs TTS { model_id } call tracked for { feature } ({ len (text )} chars)"
273+ audio_generator = client .text_to_speech .convert (
274+ text = chunk ,
275+ voice_id = voice_id ,
276+ model_id = model_id ,
277+ output_format = output_format ,
237278 )
238- except Exception as e :
239- logger .warning (f"Failed to track ElevenLabs TTS usage: { e } " )
240-
241- return audio_data
279+ audio_parts .append (b"" .join (audio_generator ))
280+ total_chars += len (chunk )
242281
282+ except Exception as e :
283+ error_msg = str (e )
284+
285+ if "quota_exceeded" in error_msg .lower ():
286+ raise TTSAPIError (f"Quota exceeded: { error_msg } " )
287+ elif "401" in error_msg or "unauthorized" in error_msg .lower ():
288+ raise TTSAPIError ("Invalid ElevenLabs API key" )
289+ elif "402" in error_msg or "payment_required" in error_msg .lower ():
290+ raise TTSAPIError (f"Payment required: { error_msg } " )
291+ elif "429" in error_msg or "rate" in error_msg .lower ():
292+ raise TTSAPIError (f"Rate limited: { error_msg } " )
293+ else :
294+ raise TTSAPIError (f"ElevenLabs TTS failed: { error_msg } " )
295+
296+ # Track usage - ElevenLabs TTS priced per character
297+ try :
298+ metadata = {
299+ "character_count" : total_chars ,
300+ "voice_id" : voice_id ,
301+ "output_format" : output_format ,
302+ }
303+
304+ log_llm_usage (
305+ provider = "elevenlabs" ,
306+ model = model_id ,
307+ feature = feature ,
308+ prompt_tokens = total_chars , # Store character count in prompt_tokens
309+ response_tokens = 0 , # TTS doesn't have response tokens
310+ video_id = video_id ,
311+ metadata = metadata ,
312+ )
313+ logger .info (
314+ f"ElevenLabs TTS { model_id } call tracked for { feature } ({ total_chars } chars)"
315+ )
243316 except Exception as e :
244- error_msg = str (e )
245-
246- if "quota_exceeded" in error_msg .lower ():
247- raise TTSAPIError (f"Quota exceeded: { error_msg } " )
248- elif "401" in error_msg or "unauthorized" in error_msg .lower ():
249- raise TTSAPIError ("Invalid ElevenLabs API key" )
250- elif "402" in error_msg or "payment_required" in error_msg .lower ():
251- raise TTSAPIError (f"Payment required: { error_msg } " )
252- elif "429" in error_msg or "rate" in error_msg .lower ():
253- raise TTSAPIError (f"Rate limited: { error_msg } " )
254- else :
255- raise TTSAPIError (f"ElevenLabs TTS failed: { error_msg } " )
317+ logger .warning (f"Failed to track ElevenLabs TTS usage: { e } " )
318+
319+ return b"" .join (audio_parts )
256320
257321
258322def generate_audio (
0 commit comments