Skip to content

Commit 18e046c

Browse files
committed
TTS in the llm cost
1 parent b14b4a2 commit 18e046c

File tree

7 files changed

+99
-15
lines changed

7 files changed

+99
-15
lines changed

config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,14 @@ def validate(self) -> None:
247247
errors.append(
248248
"GEMINI_API_KEY is required when TRANSCRIPTION_PROVIDER=gemini"
249249
)
250+
elif self.transcription_provider == "mistral":
251+
if not self.mistral_api_key:
252+
errors.append(
253+
"MISTRAL_API_KEY is required when TRANSCRIPTION_PROVIDER=mistral"
254+
)
250255
else:
251256
errors.append(
252-
f"Invalid TRANSCRIPTION_PROVIDER: {self.transcription_provider}. Must be 'openai' or 'gemini'"
257+
f"Invalid TRANSCRIPTION_PROVIDER: {self.transcription_provider}. Must be 'openai', 'gemini', or 'mistral'"
253258
)
254259

255260
# Check summarization provider configuration

routes/transcription.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Transcription routes.
33
"""
44

5-
import os
65
import logging
76
from fastapi import APIRouter, HTTPException
87
from fastapi.responses import JSONResponse
@@ -12,6 +11,7 @@
1211
TranscriptionJob,
1312
JobStatus,
1413
)
14+
from services.path_utils import expand_path
1515

1616
logger = logging.getLogger(__name__)
1717
router = APIRouter()
@@ -64,7 +64,7 @@ def start_transcription(video_id: str):
6464

6565
audio_path = config.get_audio_path(video_id)
6666

67-
if not os.path.exists(audio_path):
67+
if not expand_path(audio_path).exists():
6868
raise HTTPException(
6969
status_code=404,
7070
detail=f"Audio file not found for video {video_id}. Please stream the video first.",

services/database.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,8 @@ def get_llm_usage_summary(
824824
SUM(prompt_tokens) as total_prompt_tokens,
825825
SUM(response_tokens) as total_response_tokens,
826826
SUM(reasoning_tokens) as total_reasoning_tokens,
827-
SUM(total_tokens) as total_tokens
827+
SUM(total_tokens) as total_tokens,
828+
SUM(audio_duration_seconds) as total_audio_duration_seconds
828829
FROM llm_usage_stats
829830
WHERE 1=1
830831
"""
@@ -864,6 +865,8 @@ def get_llm_usage_summary(
864865
"total_response_tokens": row["total_response_tokens"] or 0,
865866
"total_reasoning_tokens": row["total_reasoning_tokens"] or 0,
866867
"total_tokens": row["total_tokens"] or 0,
868+
"total_audio_duration_seconds": row["total_audio_duration_seconds"]
869+
or 0,
867870
}
868871
results["by_provider_model_feature"].append(stat)
869872

services/tts.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
"""
66

77
import re
8+
import logging
89
from typing import Optional, Literal
910
from elevenlabs.client import ElevenLabs
1011
from bs4 import BeautifulSoup
1112

1213
from services.path_utils import expand_path
1314
from services.llm_clients import get_tracked_openai_client
15+
from services.database import log_llm_usage
16+
17+
logger = logging.getLogger(__name__)
1418

1519
TTSProvider = Literal["openai", "elevenlabs"]
1620

@@ -160,16 +164,20 @@ def _generate_audio_elevenlabs(
160164
api_key: str,
161165
model_id: str = "eleven_flash_v2_5",
162166
output_format: str = "mp3_44100_128",
167+
feature: str = "tts",
168+
video_id: Optional[str] = None,
163169
) -> bytes:
164170
"""
165-
Generate audio using ElevenLabs API.
171+
Generate audio using ElevenLabs API with usage tracking.
166172
167173
Args:
168174
text: Text to convert to speech
169175
voice_id: ElevenLabs voice ID
170176
api_key: ElevenLabs API key
171177
model_id: Model to use for generation
172178
output_format: Audio format
179+
feature: Feature name for tracking (default: "tts")
180+
video_id: Associated video ID for tracking (optional)
173181
174182
Returns:
175183
MP3 audio data as bytes
@@ -205,7 +213,32 @@ def _generate_audio_elevenlabs(
205213
output_format=output_format,
206214
)
207215

208-
return b"".join(audio_generator)
216+
audio_data = b"".join(audio_generator)
217+
218+
# Track usage - ElevenLabs TTS priced per character
219+
try:
220+
metadata = {
221+
"character_count": len(text),
222+
"voice_id": voice_id,
223+
"output_format": output_format,
224+
}
225+
226+
log_llm_usage(
227+
provider="elevenlabs",
228+
model=model_id,
229+
feature=feature,
230+
prompt_tokens=len(text), # Store character count in prompt_tokens
231+
response_tokens=0, # TTS doesn't have response tokens
232+
video_id=video_id,
233+
metadata=metadata,
234+
)
235+
logger.info(
236+
f"ElevenLabs TTS {model_id} call tracked for {feature} ({len(text)} chars)"
237+
)
238+
except Exception as e:
239+
logger.warning(f"Failed to track ElevenLabs TTS usage: {e}")
240+
241+
return audio_data
209242

210243
except Exception as e:
211244
error_msg = str(e)
@@ -271,7 +304,9 @@ def generate_audio(
271304
if not voice:
272305
raise ValueError("ElevenLabs requires a voice_id")
273306
model = model or "eleven_flash_v2_5"
274-
return _generate_audio_elevenlabs(text, voice, api_key, model)
307+
return _generate_audio_elevenlabs(
308+
text, voice, api_key, model, feature=feature, video_id=video_id
309+
)
275310

276311
else:
277312
raise ValueError(f"Unsupported TTS provider: {provider}")

templates/stats.html

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,11 @@ <h2><i class="fas fa-table"></i> Detailed Breakdown</h2>
552552
'gpt-4o-mini': { input: 0.15, output: 0.60 },
553553
'gpt-4o': { input: 2.50, output: 10.00 },
554554
'gpt-5.2': { input: 1.75, output: 14.00 },
555+
// ElevenLabs TTS models (priced per character, stored in prompt_tokens)
556+
'eleven_flash_v2_5': { input: 100.00, output: 0 }, // $0.10 per 1K chars
557+
'eleven_turbo_v2_5': { input: 300.00, output: 0 }, // $0.30 per 1K chars
558+
'eleven_multilingual_v2': { input: 300.00, output: 0 }, // $0.30 per 1K chars
559+
'eleven_monolingual_v1': { input: 300.00, output: 0 }, // $0.30 per 1K chars
555560
// Gemini models (text/summarization)
556561
'gemini-2.5-flash': { input: 0.15, output: 0.60 },
557562
'gemini-2.5-flash-preview-tts': { input: 0.15, output: 0.60 },
@@ -567,16 +572,31 @@ <h2><i class="fas fa-table"></i> Detailed Breakdown</h2>
567572
output: 0.40
568573
};
569574

575+
// Audio transcription pricing (per second)
576+
const AUDIO_PRICING_PER_SECOND = {
577+
'whisper-1': 0.0001, // $0.006 per minute / 60
578+
'voxtral-mini-latest': 0.00005, // $0.003 per minute / 60
579+
};
580+
570581
function calculateCost(summary) {
571582
let totalCost = 0;
572583

573584
summary.by_provider_model_feature.forEach(item => {
574585
const model = item.model;
575586
const feature = item.feature;
576587
const provider = item.provider;
577-
let pricing = MODEL_PRICING[model];
588+
const audioDuration = item.total_audio_duration_seconds || 0;
589+
590+
// Check if this model uses per-second audio pricing
591+
if (AUDIO_PRICING_PER_SECOND[model] && audioDuration > 0) {
592+
const cost = audioDuration * AUDIO_PRICING_PER_SECOND[model];
593+
totalCost += cost;
594+
console.log(`${provider}/${model}/${feature}: ${(audioDuration / 60).toFixed(2)} minutes = $${cost.toFixed(4)}`);
595+
return;
596+
}
578597

579598
// Special handling for Gemini audio transcription
599+
let pricing = MODEL_PRICING[model];
580600
if (provider === 'gemini' && feature === 'transcription') {
581601
pricing = GEMINI_AUDIO_PRICING;
582602
console.log(`Using Gemini audio pricing for ${model} transcription`);
@@ -803,9 +823,18 @@ <h2><i class="fas fa-table"></i> Detailed Breakdown</h2>
803823
const feature = item.feature;
804824
const model = item.model;
805825
const provider = item.provider;
806-
let pricing = MODEL_PRICING[model];
826+
const audioDuration = item.total_audio_duration_seconds || 0;
827+
828+
// Check if this model uses per-second audio pricing
829+
if (AUDIO_PRICING_PER_SECOND[model] && audioDuration > 0) {
830+
const cost = audioDuration * AUDIO_PRICING_PER_SECOND[model];
831+
featureCosts[feature] = (featureCosts[feature] || 0) + cost;
832+
featureCalls[feature] = (featureCalls[feature] || 0) + item.call_count;
833+
return;
834+
}
807835

808836
// Special handling for Gemini audio transcription
837+
let pricing = MODEL_PRICING[model];
809838
if (provider === 'gemini' && feature === 'transcription') {
810839
pricing = GEMINI_AUDIO_PRICING;
811840
}

tests/routes/test_transcription.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,19 @@ def test_start_transcription_audio_not_found(
155155
assert "Audio file not found" in response.json()["detail"]
156156

157157
@patch("routes.transcription.get_transcription_queue")
158-
@patch("routes.transcription.os.path.exists")
158+
@patch("routes.transcription.expand_path")
159159
@patch("routes.transcription.config")
160160
def test_start_transcription_success(
161-
self, mock_config, mock_exists, mock_get_queue, client
161+
self, mock_config, mock_expand_path, mock_get_queue, client
162162
):
163163
"""Test successful transcription start."""
164164
mock_config.transcription_enabled = True
165165
mock_config.get_audio_path.return_value = "/tmp/test123.mp3"
166-
mock_exists.return_value = True
166+
167+
# Mock expand_path to return a Mock Path object with exists() returning True
168+
mock_path = Mock()
169+
mock_path.exists.return_value = True
170+
mock_expand_path.return_value = mock_path
167171

168172
mock_queue = Mock()
169173
mock_get_queue.return_value = mock_queue
@@ -182,15 +186,20 @@ def test_start_transcription_success(
182186
assert job.audio_path == "/tmp/test123.mp3"
183187

184188
@patch("routes.transcription.get_transcription_queue")
185-
@patch("routes.transcription.os.path.exists")
189+
@patch("routes.transcription.expand_path")
186190
@patch("routes.transcription.config")
187191
def test_start_transcription_error(
188-
self, mock_config, mock_exists, mock_get_queue, client
192+
self, mock_config, mock_expand_path, mock_get_queue, client
189193
):
190194
"""Test start transcription with error."""
191195
mock_config.transcription_enabled = True
192196
mock_config.get_audio_path.return_value = "/tmp/test123.mp3"
193-
mock_exists.return_value = True
197+
198+
# Mock expand_path to return a Mock Path object with exists() returning True
199+
mock_path = Mock()
200+
mock_path.exists.return_value = True
201+
mock_expand_path.return_value = mock_path
202+
194203
mock_get_queue.side_effect = Exception("Queue error")
195204

196205
response = client.post("/transcription/start/test123")

update.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ echo "----------------------------------------"
6666
uv run python migrate_database.py
6767
uv run python migrate_add_metadata.py
6868
uv run python migrate_add_queue_columns.py
69+
uv run python migrate_add_llm_stats.py
70+
uv run python migrate_add_audio_duration.py
71+
uv run python migrate_add_weekly_summary.py
6972

7073
# Then initialize/update schema (creates tables if they don't exist)
7174
uv run python -c "from services.database import init_database; init_database(); print('Database schema updated successfully')"

0 commit comments

Comments
 (0)