feat: Add LiteLLM monitoring endpoint, dashboard display, and configuration.

Your Name · Your Name · commit 346fea4a8d48 · 2026-03-06T07:48:48.000+01:00
diff --git a/.env.template b/.env.template
@@ -66,3 +66,5 @@ OPENWEATHER_API_KEY=your_openweathermap_api_key
 # Prometheus & GPU Metrics
 PROMETHEUS_URL=http://kube-prometheus-stack-prometheus.kube-prometheus-stack.svc:9090
 GPU_HOT_URL=https://gpu-hot.p.zacharie.org
+LITELLM_URL=http://litellm.litellm.svc.cluster.local:4000
+LITELLM_API_KEY=your_lite_llm_master_key
diff --git a/src/interfaces/http/handlers/monitoring/mod.rs b/src/interfaces/http/handlers/monitoring/mod.rs
@@ -497,6 +497,102 @@ async fn fetch_gpu_and_energy_metrics(client: &reqwest::Client) -> GpuMetrics {
     metrics
 }
 
+/// LiteLLM Metrics proxy handler
+pub async fn litellm_metrics_handler(
+    axum::extract::State(state): axum::extract::State<crate::state::AppState>,
+) -> impl IntoResponse {
+    let litellm_url = std::env::var("LITELLM_URL")
+        .unwrap_or_else(|_| "http://litellm.litellm.svc.cluster.local:4000".to_string());
+    let litellm_key = std::env::var("LITELLM_API_KEY").unwrap_or_default();
+
+    if litellm_key.is_empty() {
+        return api_error(
+            axum::http::StatusCode::SERVICE_UNAVAILABLE,
+            "LITELLM_API_KEY not configured",
+        );
+    }
+
+    // 1. Fetch Health
+    let health_url = format!("{}/health/readiness", litellm_url);
+    let health_req = state
+        .http_client
+        .get(&health_url)
+        .header("Authorization", format!("Bearer {}", litellm_key))
+        .timeout(std::time::Duration::from_secs(5))
+        .send();
+
+    // 2. Fetch Models Info
+    let models_url = format!("{}/model/info", litellm_url);
+    let models_req = state
+        .http_client
+        .get(&models_url)
+        .header("Authorization", format!("Bearer {}", litellm_key))
+        .timeout(std::time::Duration::from_secs(5))
+        .send();
+
+    // 3. Fetch Spend Logs
+    let spend_url = format!("{}/spend/logs", litellm_url);
+    let spend_req = state
+        .http_client
+        .get(&spend_url)
+        .header("Authorization", format!("Bearer {}", litellm_key))
+        .timeout(std::time::Duration::from_secs(5))
+        .send();
+
+    // Execute in parallel
+    let (health_res, models_res, spend_res) = tokio::join!(health_req, models_req, spend_req);
+
+    let mut response_data = serde_json::Map::new();
+    response_data.insert("litellm_url".to_string(), json!(litellm_url));
+
+    // Process Health
+    match health_res {
+        Ok(resp) if resp.status().is_success() => {
+            response_data.insert("healthy".to_string(), json!(true));
+        }
+        _ => {
+            response_data.insert("healthy".to_string(), json!(false));
+        }
+    }
+
+    // Process Models
+    if let Ok(resp) = models_res {
+        if resp.status().is_success() {
+            if let Ok(data) = resp.json::<serde_json::Value>().await {
+                response_data.insert("models".to_string(), data.clone());
+                if let Some(models_list) = data.as_array() {
+                    response_data.insert("model_count".to_string(), json!(models_list.len()));
+                } else if let Some(data_obj) = data.get("data") {
+                    if let Some(models_list) = data_obj.as_array() {
+                        response_data.insert("model_count".to_string(), json!(models_list.len()));
+                    }
+                }
+            }
+        }
+    }
+
+    // Process Spend
+    if let Ok(resp) = spend_res {
+        if resp.status().is_success() {
+            if let Ok(data) = resp.json::<serde_json::Value>().await {
+                response_data.insert("spend_data".to_string(), data.clone());
+
+                // Try to calculate totals if data is a list of logs
+                if let Some(logs) = data.as_array() {
+                    let total_spend: f64 = logs
+                        .iter()
+                        .filter_map(|log| log["spend"].as_f64())
+                        .sum();
+                    response_data.insert("total_spend".to_string(), json!(total_spend));
+                    response_data.insert("request_count".to_string(), json!(logs.len()));
+                }
+            }
+        }
+    }
+
+    api_success(json!(response_data))
+}
+
 /// Helper to query Prometheus and extract a scalar value
 async fn query_prometheus_scalar(client: &reqwest::Client, url: &str, query: &str) -> Option<f64> {
     let response = client
diff --git a/src/interfaces/http/routes.rs b/src/interfaces/http/routes.rs
@@ -139,6 +139,10 @@ pub fn configure_routes(state: AppState) -> Router {
         .route("/api/monitoring/enphase/debug", get(enphase_debug_handler))
         .route("/api/monitoring/gpu/debug", get(gpu_debug_handler))
         .route("/api/monitoring/trivy/debug", get(trivy_debug_handler))
+        .route(
+            "/api/monitoring/litellm/metrics",
+            get(litellm_metrics_handler),
+        )
         .route("/api/dashboard/metrics", get(metrics_handler)) // Dashboard metrics
         .route("/api/chat", post(post_chat_handler))
         .route("/api/prometheus/range", get(prometheus_range_handler))
diff --git a/static/js/dashboard.js b/static/js/dashboard.js
@@ -643,10 +643,85 @@ const MetricsManager = {
                 </p>
             </div>
             ` : ''}
+            
+            <h3 style="${sectionHeaderStyle}">🤖 LiteLLM AI Gateway</h3>
+            <div id="litellm-metrics-container">
+                <div class="loading-mini" style="text-align: center; padding: 2rem; opacity: 0.5;">SCANNING_AI_NEURAL_NET...</div>
+            </div>
         `;
 
         // Load 24h graph data asynchronously
         this.loadSolarGraph();
+
+        // Load LiteLLM metrics asynchronously
+        this.loadLitellmMetrics();
+    },
+
+    /**
+     * Load LiteLLM metrics asynchronously
+     */
+    async loadLitellmMetrics() {
+        const container = document.getElementById('litellm-metrics-container');
+        if (!container) return;
+
+        try {
+            const data = await api.get('/api/monitoring/litellm/metrics');
+
+            if (!data) throw new Error('No data received');
+
+            let modelsHtml = '';
+            if (data.models && data.models.data) {
+                // Take top 5 models if many
+                const models = data.models.data.slice(0, 5);
+                modelsHtml = `
+                    <div style="margin-top: 1rem; font-size: 0.8rem; border-top: 1px solid rgba(0,255,249,0.1); padding-top: 0.5rem;">
+                        <div style="color: var(--neon-cyan); margin-bottom: 0.5rem; font-family: 'Orbitron'; font-size: 0.7rem;">ACTIVE_MODELS</div>
+                        ${models.map(m => `
+                            <div style="display: flex; justify-content: space-between; margin-bottom: 3px; font-family: 'JetBrains Mono';">
+                                <span style="opacity: 0.7;">${m.model_name || m.id}</span>
+                                <span style="color: var(--neon-green);">${m.litellm_params?.model || 'active'}</span>
+                            </div>
+                        `).join('')}
+                    </div>
+                `;
+            }
+
+            container.innerHTML = `
+                <div class="metrics-grid">
+                    <div class="metric-card ${data.healthy ? '' : 'alert-critical'}">
+                        <div class="metric-icon">🛡️</div>
+                        <div class="metric-value">${data.healthy ? 'HEALTHY' : 'OFFLINE'}</div>
+                        <div class="metric-label">Gateway Status</div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-icon">🧠</div>
+                        <div class="metric-value">${data.model_count || 0}</div>
+                        <div class="metric-label">Active Models</div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-icon">💳</div>
+                        <div class="metric-value">$${data.total_spend?.toFixed(4) || '0.000'}</div>
+                        <div class="metric-label">Recent Spend</div>
+                    </div>
+                    <div class="metric-card">
+                        <div class="metric-icon">📈</div>
+                        <div class="metric-value">${data.request_count || 0}</div>
+                        <div class="metric-label">Recent Requests</div>
+                    </div>
+                </div>
+                ${modelsHtml}
+            `;
+        } catch (error) {
+            console.error('LiteLLM metrics error:', error);
+            container.innerHTML = `
+                <div style="padding: 1rem; background: rgba(255,0,0,0.05); border-radius: 4px; border-left: 3px solid var(--neon-magenta);">
+                    <p style="margin: 0; font-size: 0.8rem; color: var(--text-secondary);">
+                        ⚠️ <strong>LiteLLM Metrics Offline</strong> - Unable to fetch gateway data.<br>
+                        <span style="font-size: 0.7rem; opacity: 0.7;">${error.message}</span>
+                    </p>
+                </div>
+            `;
+        }
     },
 
     /**
diff --git a/static/partials/metrics.html b/static/partials/metrics.html
@@ -43,6 +43,10 @@ <h2 class="section-title">📊 Cluster Metrics (Prometheus)</h2>
         style="border-color: var(--neon-orange, #ff8800); color: var(--neon-orange, #ff8800); text-decoration: none; display: flex; align-items: center; gap: 8px;">
         <span>🔥</span> Prometheus Query
     </a>
+    <a href="https://litellm.p.zacharie.org/ui" target="_blank" class="cyber-btn"
+        style="border-color: var(--neon-cyan); color: var(--neon-cyan); text-decoration: none; display: flex; align-items: center; gap: 8px;">
+        <span>🤖</span> LiteLLM Dashboard
+    </a>
 </div>
 
 <div id="metrics-content">