Set the cost per models

MrDesjardins · MrDesjardins · commit 102f775e1ccd · 2026-02-02T21:16:13.000-08:00
diff --git a/README.md b/README.md
@@ -295,14 +295,49 @@ curl "http://localhost:8000/admin/weekly-summary/next-run"
 
 ## Costs
 
-Approximate API costs per operation:
-- **Whisper transcription**: $0.006 per minute of audio
-- **GPT-4o-mini summarization**: ~$0.001-0.01 per summary (avg 1,500 tokens)
-- **GPT-4o weekly summary**: ~$0.05-0.15 per summary (avg 10,000 tokens)
-- **Gemini**: Free tier available (15 requests/minute, 1 million tokens/day)
+### Current Model Pricing (Per 1M Tokens)
+
+| Model | Input | Output | Notes |
+|-------|-------|--------|-------|
+| **OpenAI** ||||
+| gpt-5-nano | $0.05 | $0.40 | Ultra-lightweight for high-volume tasks |
+| gpt-4o-mini | $0.15 | $0.60 | Reliable workhorse (recommended) |
+| gpt-4o | $2.50 | $10.00 | Higher quality, stable pricing |
+| gpt-5.2 | $1.75 | $14.00 | Extended thinking capacity |
+| whisper-1 | - | - | $0.006 per minute of audio |
+| **Google Gemini** ||||
+| gemini-2.5-flash | $0.15 | $0.60 | Fast, comparable to gpt-4o-mini (recommended) |
+| gemini-3-flash-preview | $0.50 | $3.00 | Speed-optimized preview |
+| gemini-3-pro-preview | $2.00 | $12.00 | High quality (≤200k context) |
+
+### Estimated Costs Per Operation
+
+**Using default configuration (Whisper + Gemini 2.5 Flash):**
+- **Video transcription** (Whisper): $0.006 per minute of audio
+- **Video summarization** (Gemini 2.5 Flash): ~$0.0003-0.001 per summary
+  - Typical: 2,000 input tokens (transcript) + 500 output tokens
+  - Cost: (2,000 × $0.15 + 500 × $0.60) / 1,000,000 = **$0.0006**
+- **Weekly summary** (Gemini 2.5 Flash): ~$0.003-0.01 per summary
+  - Typical: 10,000 input tokens + 2,000 output tokens
+  - Cost: (10,000 × $0.15 + 2,000 × $0.60) / 1,000,000 = **$0.0027**
+- **Book suggestions** (Gemini 2.5 Flash): ~$0.0002-0.0005 per request
+  - Typical: 1,000 input tokens + 100 output tokens
+  - Cost: (1,000 × $0.15 + 100 × $0.60) / 1,000,000 = **$0.0002**
+
+**Example monthly cost** (watching 30 hours/month):
+- Transcription: 30 hours × 60 min × $0.006 = **$10.80**
+- Summarization: 30 videos × $0.0006 = **$0.02**
+- Weekly summaries: 4 weeks × $0.0027 = **$0.01**
+- **Total: ~$10.83/month**
+
+**Gemini Free Tier:**
+- 15 requests per minute
+- 1 million tokens per day
+- Summarization and weekly summaries are essentially free under these limits
+- Only transcription (Whisper) has costs
 
 **Cost tracking:**
-Use the LLM usage statistics endpoints above to monitor your actual usage and calculate precise costs based on current provider pricing.
+Use the `/admin/llm-usage/stats` and `/admin/llm-usage/summary` endpoints to monitor your actual usage and calculate precise costs based on current provider pricing.
 
 # Server configuration
 
diff --git a/templates/stats.html b/templates/stats.html
@@ -432,6 +432,50 @@ <h2><i class="fas fa-table"></i> Detailed Breakdown</h2>
         let providerChart = null;
         let featureChart = null;
 
+        // Model pricing (per 1M tokens)
+        const MODEL_PRICING = {
+            // OpenAI models
+            'whisper-1': { input: 0, output: 0 }, // Whisper is priced per minute, not tokens
+            'gpt-5-nano': { input: 0.05, output: 0.40 },
+            'gpt-4o-mini': { input: 0.15, output: 0.60 },
+            'gpt-4o': { input: 2.50, output: 10.00 },
+            'gpt-5.2': { input: 1.75, output: 14.00 },
+            // Gemini models
+            'gemini-2.5-flash': { input: 0.15, output: 0.60 },
+            'gemini-2.5-flash-preview-tts': { input: 0.15, output: 0.60 },
+            'gemini-3-flash-preview': { input: 0.50, output: 3.00 },
+            'gemini-3-pro-preview': { input: 2.00, output: 12.00 },
+            'gemini-1.5-flash': { input: 0.15, output: 0.60 },
+            'gemini-1.5-pro': { input: 2.00, output: 12.00 },
+        };
+
+        function calculateCost(summary) {
+            let totalCost = 0;
+
+            summary.by_provider_model_feature.forEach(item => {
+                const model = item.model;
+                const pricing = MODEL_PRICING[model];
+
+                if (pricing) {
+                    const inputTokens = item.total_prompt_tokens || 0;
+                    const outputTokens = item.total_response_tokens || 0;
+
+                    // Calculate cost: (tokens / 1,000,000) * price_per_million
+                    const inputCost = (inputTokens / 1000000) * pricing.input;
+                    const outputCost = (outputTokens / 1000000) * pricing.output;
+
+                    totalCost += inputCost + outputCost;
+                } else {
+                    // Unknown model - use average rate as fallback
+                    console.warn(`Unknown model pricing: ${model}, using average rate`);
+                    const totalTokens = item.total_tokens || 0;
+                    totalCost += (totalTokens / 1000000) * 1.0; // $1 per 1M tokens fallback
+                }
+            });
+
+            return totalCost;
+        }
+
         async function loadStatistics() {
             try {
                 document.getElementById('loading').style.display = 'block';
@@ -478,9 +522,18 @@ <h2><i class="fas fa-table"></i> Detailed Breakdown</h2>
             document.getElementById('token-breakdown').textContent =
                 `${totals.total_prompt_tokens.toLocaleString()} input / ${totals.total_response_tokens.toLocaleString()} output`;
 
-            // Estimate cost (rough average: $0.000001 per token)
-            const estimatedCost = (totals.total_tokens * 0.000001).toFixed(2);
-            document.getElementById('estimated-cost').textContent = `$${estimatedCost}`;
+            // Calculate accurate cost based on actual model pricing
+            const estimatedCost = calculateCost(summary);
+            document.getElementById('estimated-cost').textContent = `$${estimatedCost.toFixed(4)}`;
+
+            // Update sub-label to show it's actual pricing
+            const costCard = document.getElementById('estimated-cost').parentElement;
+            const subLabel = costCard.querySelector('.stat-sub');
+            if (estimatedCost < 0.01) {
+                subLabel.textContent = `≈ $${(estimatedCost * 100).toFixed(2)}¢`;
+            } else {
+                subLabel.textContent = 'Based on actual model pricing';
+            }
 
             // Find most used
             if (summary.by_provider_model_feature.length > 0) {
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -29,26 +29,42 @@ def mock_config():
     return config
 
 
-@pytest.fixture
-def temp_db():
-    """Temporary SQLite database for testing."""
+@pytest.fixture(autouse=True)
+def db_path(monkeypatch):
+    """Temporary SQLite database for testing.
+
+    This fixture runs automatically for ALL tests to ensure they never
+    touch the development database.
+
+    Renamed from temp_db to db_path to match existing test expectations.
+    """
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = f.name
+        temp_path = f.name
 
-    # Set environment variable for database
-    original_db = os.environ.get("DATABASE_PATH")
-    os.environ["DATABASE_PATH"] = db_path
+    # Set environment variable for database using monkeypatch
+    # This ensures proper cleanup even if tests fail
+    monkeypatch.setenv("DATABASE_PATH", temp_path)
 
-    yield db_path
+    # Force reload of database module to pick up new DATABASE_PATH
+    import services.database
+    import importlib
 
-    # Cleanup
-    if original_db:
-        os.environ["DATABASE_PATH"] = original_db
-    else:
-        os.environ.pop("DATABASE_PATH", None)
+    importlib.reload(services.database)
 
-    if os.path.exists(db_path):
-        os.unlink(db_path)
+    # Initialize the database with all required tables
+    from services.database import init_database
+
+    init_database()
+
+    yield temp_path
+
+    # Cleanup
+    if os.path.exists(temp_path):
+        try:
+            os.unlink(temp_path)
+        except Exception:
+            # Ignore cleanup errors
+            pass
 
 
 @pytest.fixture
diff --git a/tests/services/test_database.py b/tests/services/test_database.py
@@ -1,8 +1,5 @@
 """Tests for database service."""
 
-import os
-import tempfile
-import pytest
 from services.database import (
     init_database,
     add_to_history,
@@ -17,30 +14,8 @@
     get_db_connection,
 )
 
-
-@pytest.fixture(autouse=True)
-def db_path(monkeypatch):
-    """Create temporary database for testing."""
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        path = f.name
-
-    # Set environment variable BEFORE importing services
-    monkeypatch.setenv("DATABASE_PATH", path)
-
-    # Reload the database module to pick up new path
-    import services.database
-    import importlib
-
-    importlib.reload(services.database)
-
-    yield path
-
-    # Cleanup
-    if os.path.exists(path):
-        try:
-            os.unlink(path)
-        except Exception:
-            pass
+# Note: The temp_db fixture from conftest.py is used automatically
+# for all tests (autouse=True), so no need to define it here
 
 
 class TestDatabaseInit: