Skip to content

Commit b14b4a2

Browse files
committed
Capture LLM cost of TTS
1 parent 9cfe256 commit b14b4a2

File tree

5 files changed

+129
-48
lines changed

5 files changed

+129
-48
lines changed

services/llm_clients.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,66 @@ def create_chat_completion(
167167
except Exception:
168168
raise
169169

170+
def text_to_speech(
171+
self,
172+
text: str,
173+
voice: str,
174+
feature: str,
175+
video_id: Optional[str] = None,
176+
metadata: Optional[Dict] = None,
177+
model: str = "tts-1",
178+
) -> Any:
179+
"""
180+
Generate speech from text with automatic usage tracking.
181+
182+
Args:
183+
text: Text to convert to speech
184+
voice: Voice to use (alloy, echo, fable, onyx, nova, shimmer)
185+
feature: Feature name for tracking (e.g., "weekly_summary_tts")
186+
video_id: Associated video ID (optional)
187+
metadata: Additional metadata (optional)
188+
model: TTS model to use (tts-1 or tts-1-hd)
189+
190+
Returns:
191+
OpenAI response object with audio data
192+
"""
193+
try:
194+
# Make API call
195+
response = self.client.audio.speech.create(
196+
model=model,
197+
voice=voice,
198+
input=text,
199+
response_format="mp3",
200+
)
201+
202+
# Track usage
203+
# TTS is priced per character, not tokens
204+
try:
205+
tracking_metadata = metadata or {}
206+
tracking_metadata["character_count"] = len(text)
207+
tracking_metadata["voice"] = voice
208+
tracking_metadata["audio_format"] = "mp3"
209+
210+
log_llm_usage(
211+
provider="openai",
212+
model=model,
213+
feature=feature,
214+
prompt_tokens=len(text), # Store character count in prompt_tokens
215+
response_tokens=0, # TTS doesn't have response tokens
216+
video_id=video_id,
217+
metadata=tracking_metadata,
218+
)
219+
logger.info(
220+
f"OpenAI TTS {model} call tracked for {feature} ({len(text)} chars)"
221+
)
222+
except Exception as e:
223+
logger.warning(f"Failed to track OpenAI TTS usage: {e}")
224+
225+
return response
226+
227+
except Exception:
228+
raise
229+
170230

171231
class TrackedGeminiClient:
172232
"""

services/tts.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
import re
88
from typing import Optional, Literal
99
from elevenlabs.client import ElevenLabs
10-
from openai import OpenAI
1110
from bs4 import BeautifulSoup
1211

1312
from services.path_utils import expand_path
13+
from services.llm_clients import get_tracked_openai_client
1414

1515
TTSProvider = Literal["openai", "elevenlabs"]
1616

@@ -94,16 +94,21 @@ def extract_summary_text_for_tts(html_content: str) -> str:
9494

9595

9696
def _generate_audio_openai(
97-
text: str, voice: str, api_key: str, model: str = "tts-1"
97+
text: str,
98+
voice: str,
99+
model: str = "tts-1",
100+
feature: str = "tts",
101+
video_id: Optional[str] = None,
98102
) -> bytes:
99103
"""
100-
Generate audio using OpenAI TTS API.
104+
Generate audio using OpenAI TTS API with usage tracking.
101105
102106
Args:
103107
text: Text to convert to speech
104108
voice: OpenAI voice (alloy, echo, fable, onyx, nova, shimmer)
105-
api_key: OpenAI API key
106109
model: Model to use (tts-1 or tts-1-hd)
110+
feature: Feature name for tracking (default: "tts")
111+
video_id: Associated video ID for tracking (optional)
107112
108113
Returns:
109114
MP3 audio data as bytes
@@ -122,10 +127,15 @@ def _generate_audio_openai(
122127
text = text[: max_chars - 3] + "..."
123128

124129
try:
125-
client = OpenAI(api_key=api_key)
130+
# Use tracked client for automatic usage tracking
131+
client = get_tracked_openai_client()
126132

127-
response = client.audio.speech.create(
128-
model=model, voice=voice, input=text, response_format="mp3"
133+
response = client.text_to_speech(
134+
text=text,
135+
voice=voice,
136+
model=model,
137+
feature=feature,
138+
video_id=video_id,
129139
)
130140

131141
# Read the audio data from the response
@@ -214,42 +224,50 @@ def _generate_audio_elevenlabs(
214224

215225
def generate_audio(
216226
text: str,
217-
api_key: str,
227+
api_key: Optional[str] = None,
218228
provider: TTSProvider = "openai",
219229
voice: Optional[str] = None,
220230
model: Optional[str] = None,
231+
feature: str = "tts",
232+
video_id: Optional[str] = None,
221233
) -> bytes:
222234
"""
223235
Generate audio from text using the specified TTS provider.
224236
225237
Args:
226238
text: Text to convert to speech
227-
api_key: API key for the provider
239+
api_key: API key for the provider (required for ElevenLabs, optional for OpenAI)
228240
provider: TTS provider to use ("openai" or "elevenlabs")
229241
voice: Voice ID/name (provider-specific)
230242
model: Model ID (provider-specific)
243+
feature: Feature name for usage tracking (default: "tts")
244+
video_id: Associated video ID for tracking (optional)
231245
232246
Returns:
233247
MP3 audio data as bytes
234248
235249
Raises:
236250
TTSAPIError: If API request fails
237-
ValueError: If provider is invalid
251+
ValueError: If provider is invalid or required parameters are missing
238252
239253
Provider-specific defaults:
240254
OpenAI:
241255
- voice: "alloy" (options: alloy, echo, fable, onyx, nova, shimmer)
242256
- model: "tts-1" (options: tts-1, tts-1-hd)
257+
- api_key: Read from config (automatic tracking)
243258
ElevenLabs:
244259
- voice: Must be provided
245260
- model: "eleven_flash_v2_5"
261+
- api_key: Required
246262
"""
247263
if provider == "openai":
248264
voice = voice or "alloy"
249265
model = model or "tts-1"
250-
return _generate_audio_openai(text, voice, api_key, model)
266+
return _generate_audio_openai(text, voice, model, feature, video_id)
251267

252268
elif provider == "elevenlabs":
269+
if not api_key:
270+
raise ValueError("ElevenLabs requires api_key parameter")
253271
if not voice:
254272
raise ValueError("ElevenLabs requires a voice_id")
255273
model = model or "eleven_flash_v2_5"

services/weekly_summary.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -614,23 +614,24 @@ def _generate_and_attach_tts(
614614
if config.tts_provider == "openai":
615615
if not config.openai_api_key:
616616
raise ValueError("OpenAI API key not configured")
617-
api_key = config.openai_api_key
618-
voice = config.openai_tts_voice
619-
model = config.openai_tts_model
617+
audio_data = generate_audio(
618+
text=tts_text,
619+
provider="openai",
620+
voice=config.openai_tts_voice,
621+
model=config.openai_tts_model,
622+
feature="weekly_summary_tts",
623+
)
620624
else: # elevenlabs
621625
if not config.elevenlabs_api_key:
622626
raise ValueError("ElevenLabs API key not configured")
623-
api_key = config.elevenlabs_api_key
624-
voice = config.elevenlabs_voice_id
625-
model = config.elevenlabs_model_id
626-
627-
audio_data = generate_audio(
628-
text=tts_text,
629-
api_key=api_key,
630-
provider=config.tts_provider,
631-
voice=voice,
632-
model=model,
633-
)
627+
audio_data = generate_audio(
628+
text=tts_text,
629+
api_key=config.elevenlabs_api_key,
630+
provider="elevenlabs",
631+
voice=config.elevenlabs_voice_id,
632+
model=config.elevenlabs_model_id,
633+
feature="weekly_summary_tts",
634+
)
634635
duration = save_audio_file(audio_data, audio_path)
635636
logger.info(f"Saved audio file: {audio_path} ({duration}s)")
636637

templates/stats.html

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@ <h1><i class="fas fa-chart-line"></i> LLM Usage Statistics</h1>
430430
<option value="transcription">Transcription</option>
431431
<option value="summarization">Summarization</option>
432432
<option value="weekly_summary">Weekly Summary</option>
433+
<option value="weekly_summary_tts">Weekly Summary TTS</option>
433434
<option value="book_suggestions">Book Suggestions</option>
434435
</select>
435436
</div>
@@ -540,11 +541,13 @@ <h2><i class="fas fa-table"></i> Detailed Breakdown</h2>
540541
let providerChart = null;
541542
let featureChart = null;
542543

543-
// Model pricing (per 1M tokens)
544+
// Model pricing (per 1M tokens/characters)
544545
const MODEL_PRICING = {
545546
// OpenAI models
546547
'whisper-1': { input: 0, output: 0 }, // Whisper is priced per minute, not tokens
547548
'voxtral-mini-latest': { input: 0, output: 0 }, // Voxtral is priced per minute, not tokens
549+
'tts-1': { input: 15.00, output: 0 }, // TTS priced per character (stored in prompt_tokens)
550+
'tts-1-hd': { input: 30.00, output: 0 }, // TTS HD priced per character (stored in prompt_tokens)
548551
'gpt-5-nano': { input: 0.05, output: 0.40 },
549552
'gpt-4o-mini': { input: 0.15, output: 0.60 },
550553
'gpt-4o': { input: 2.50, output: 10.00 },
@@ -832,7 +835,8 @@ <h2><i class="fas fa-table"></i> Detailed Breakdown</h2>
832835
'summarization': { icon: 'fa-file-alt', name: 'Summarization', iconClass: 'summarization' },
833836
'weekly_summary': { icon: 'fa-calendar-week', name: 'Weekly Summary', iconClass: 'weekly_summary' },
834837
'book_suggestions': { icon: 'fa-book', name: 'Book Suggestions', iconClass: 'summarization' },
835-
'tts': { icon: 'fa-volume-up', name: 'Text-to-Speech', iconClass: 'weekly_summary' }
838+
'tts': { icon: 'fa-volume-up', name: 'Text-to-Speech', iconClass: 'weekly_summary' },
839+
'weekly_summary_tts': { icon: 'fa-volume-up', name: 'Weekly Summary TTS', iconClass: 'weekly_summary' }
836840
};
837841

838842
sortedFeatures.forEach(([feature, cost]) => {

tests/services/test_tts.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -79,35 +79,35 @@ def test_case_insensitive_books_match(self):
7979
class TestGenerateAudio:
8080
"""Tests for generate_audio function."""
8181

82-
@patch("services.tts.OpenAI")
83-
def test_successful_generation_openai(self, mock_openai_class):
82+
@patch("services.tts.get_tracked_openai_client")
83+
def test_successful_generation_openai(self, mock_get_client):
8484
"""Should generate audio successfully with OpenAI provider."""
8585
# Setup mock
8686
mock_client = Mock()
8787
mock_response = Mock()
8888
mock_response.read.return_value = b"fake audio data"
89-
mock_client.audio.speech.create.return_value = mock_response
90-
mock_openai_class.return_value = mock_client
89+
mock_client.text_to_speech.return_value = mock_response
90+
mock_get_client.return_value = mock_client
9191

9292
# Call function
9393
result = generate_audio(
9494
text="Hello world",
95-
api_key="test-api-key",
9695
provider="openai",
9796
voice="alloy",
9897
model="tts-1",
9998
)
10099

101100
# Assertions
102101
assert result == b"fake audio data"
103-
mock_openai_class.assert_called_once_with(api_key="test-api-key")
104-
mock_client.audio.speech.create.assert_called_once()
102+
mock_get_client.assert_called_once()
103+
mock_client.text_to_speech.assert_called_once()
105104

106105
# Check parameters
107-
call_kwargs = mock_client.audio.speech.create.call_args.kwargs
108-
assert call_kwargs["input"] == "Hello world"
106+
call_kwargs = mock_client.text_to_speech.call_args.kwargs
107+
assert call_kwargs["text"] == "Hello world"
109108
assert call_kwargs["voice"] == "alloy"
110109
assert call_kwargs["model"] == "tts-1"
110+
assert call_kwargs["feature"] == "tts"
111111

112112
@patch("services.tts.ElevenLabs")
113113
def test_successful_generation_elevenlabs(self, mock_elevenlabs_class):
@@ -141,29 +141,28 @@ def test_successful_generation_elevenlabs(self, mock_elevenlabs_class):
141141
assert call_kwargs["voice_id"] == "test-voice-id"
142142
assert call_kwargs["model_id"] == "eleven_flash_v2_5"
143143

144-
@patch("services.tts.OpenAI")
145-
def test_truncates_long_text_openai(self, mock_openai_class):
144+
@patch("services.tts.get_tracked_openai_client")
145+
def test_truncates_long_text_openai(self, mock_get_client):
146146
"""Should truncate text longer than OpenAI limit (4096 chars)."""
147147
# Setup mock
148148
mock_client = Mock()
149149
mock_response = Mock()
150150
mock_response.read.return_value = b"audio"
151-
mock_client.audio.speech.create.return_value = mock_response
152-
mock_openai_class.return_value = mock_client
151+
mock_client.text_to_speech.return_value = mock_response
152+
mock_get_client.return_value = mock_client
153153

154154
# Generate long text (longer than 4096 chars)
155155
long_text = "x" * 5000
156156

157157
# Call function
158158
generate_audio(
159159
text=long_text,
160-
api_key="test-api-key",
161160
provider="openai",
162161
)
163162

164163
# Check that text was truncated to 4096 chars
165-
call_kwargs = mock_client.audio.speech.create.call_args.kwargs
166-
posted_text = call_kwargs["input"]
164+
call_kwargs = mock_client.text_to_speech.call_args.kwargs
165+
posted_text = call_kwargs["text"]
167166
assert len(posted_text) == 4096
168167
assert posted_text.endswith("...")
169168

@@ -195,19 +194,18 @@ def test_truncates_long_text_elevenlabs(self, mock_elevenlabs_class):
195194
assert len(posted_text) == 40000
196195
assert posted_text.endswith("...")
197196

198-
@patch("services.tts.OpenAI")
199-
def test_raises_on_auth_error_openai(self, mock_openai_class):
197+
@patch("services.tts.get_tracked_openai_client")
198+
def test_raises_on_auth_error_openai(self, mock_get_client):
200199
"""Should raise TTSAPIError on OpenAI authentication error."""
201200
# Setup mock to raise auth error
202201
mock_client = Mock()
203-
mock_client.audio.speech.create.side_effect = Exception("401 Unauthorized")
204-
mock_openai_class.return_value = mock_client
202+
mock_client.text_to_speech.side_effect = Exception("401 Unauthorized")
203+
mock_get_client.return_value = mock_client
205204

206205
# Call function and expect error
207206
with pytest.raises(TTSAPIError, match="Invalid OpenAI API key"):
208207
generate_audio(
209208
text="Test",
210-
api_key="bad-key",
211209
provider="openai",
212210
)
213211

0 commit comments

Comments
 (0)