Skip to content

Commit 346fea4

Browse files
author
Your Name
committed
feat: Add LiteLLM monitoring endpoint, dashboard display, and configuration.
1 parent 89c5bdd commit 346fea4

File tree

5 files changed

+181
-0
lines changed

5 files changed

+181
-0
lines changed

.env.template

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,5 @@ OPENWEATHER_API_KEY=your_openweathermap_api_key
6666
# Prometheus & GPU Metrics
6767
PROMETHEUS_URL=http://kube-prometheus-stack-prometheus.kube-prometheus-stack.svc:9090
6868
GPU_HOT_URL=https://gpu-hot.p.zacharie.org
69+
LITELLM_URL=http://litellm.litellm.svc.cluster.local:4000
70+
LITELLM_API_KEY=your_lite_llm_master_key

src/interfaces/http/handlers/monitoring/mod.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,102 @@ async fn fetch_gpu_and_energy_metrics(client: &reqwest::Client) -> GpuMetrics {
497497
metrics
498498
}
499499

500+
/// LiteLLM Metrics proxy handler
501+
pub async fn litellm_metrics_handler(
502+
axum::extract::State(state): axum::extract::State<crate::state::AppState>,
503+
) -> impl IntoResponse {
504+
let litellm_url = std::env::var("LITELLM_URL")
505+
.unwrap_or_else(|_| "http://litellm.litellm.svc.cluster.local:4000".to_string());
506+
let litellm_key = std::env::var("LITELLM_API_KEY").unwrap_or_default();
507+
508+
if litellm_key.is_empty() {
509+
return api_error(
510+
axum::http::StatusCode::SERVICE_UNAVAILABLE,
511+
"LITELLM_API_KEY not configured",
512+
);
513+
}
514+
515+
// 1. Fetch Health
516+
let health_url = format!("{}/health/readiness", litellm_url);
517+
let health_req = state
518+
.http_client
519+
.get(&health_url)
520+
.header("Authorization", format!("Bearer {}", litellm_key))
521+
.timeout(std::time::Duration::from_secs(5))
522+
.send();
523+
524+
// 2. Fetch Models Info
525+
let models_url = format!("{}/model/info", litellm_url);
526+
let models_req = state
527+
.http_client
528+
.get(&models_url)
529+
.header("Authorization", format!("Bearer {}", litellm_key))
530+
.timeout(std::time::Duration::from_secs(5))
531+
.send();
532+
533+
// 3. Fetch Spend Logs
534+
let spend_url = format!("{}/spend/logs", litellm_url);
535+
let spend_req = state
536+
.http_client
537+
.get(&spend_url)
538+
.header("Authorization", format!("Bearer {}", litellm_key))
539+
.timeout(std::time::Duration::from_secs(5))
540+
.send();
541+
542+
// Execute in parallel
543+
let (health_res, models_res, spend_res) = tokio::join!(health_req, models_req, spend_req);
544+
545+
let mut response_data = serde_json::Map::new();
546+
response_data.insert("litellm_url".to_string(), json!(litellm_url));
547+
548+
// Process Health
549+
match health_res {
550+
Ok(resp) if resp.status().is_success() => {
551+
response_data.insert("healthy".to_string(), json!(true));
552+
}
553+
_ => {
554+
response_data.insert("healthy".to_string(), json!(false));
555+
}
556+
}
557+
558+
// Process Models
559+
if let Ok(resp) = models_res {
560+
if resp.status().is_success() {
561+
if let Ok(data) = resp.json::<serde_json::Value>().await {
562+
response_data.insert("models".to_string(), data.clone());
563+
if let Some(models_list) = data.as_array() {
564+
response_data.insert("model_count".to_string(), json!(models_list.len()));
565+
} else if let Some(data_obj) = data.get("data") {
566+
if let Some(models_list) = data_obj.as_array() {
567+
response_data.insert("model_count".to_string(), json!(models_list.len()));
568+
}
569+
}
570+
}
571+
}
572+
}
573+
574+
// Process Spend
575+
if let Ok(resp) = spend_res {
576+
if resp.status().is_success() {
577+
if let Ok(data) = resp.json::<serde_json::Value>().await {
578+
response_data.insert("spend_data".to_string(), data.clone());
579+
580+
// Try to calculate totals if data is a list of logs
581+
if let Some(logs) = data.as_array() {
582+
let total_spend: f64 = logs
583+
.iter()
584+
.filter_map(|log| log["spend"].as_f64())
585+
.sum();
586+
response_data.insert("total_spend".to_string(), json!(total_spend));
587+
response_data.insert("request_count".to_string(), json!(logs.len()));
588+
}
589+
}
590+
}
591+
}
592+
593+
api_success(json!(response_data))
594+
}
595+
500596
/// Helper to query Prometheus and extract a scalar value
501597
async fn query_prometheus_scalar(client: &reqwest::Client, url: &str, query: &str) -> Option<f64> {
502598
let response = client

src/interfaces/http/routes.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ pub fn configure_routes(state: AppState) -> Router {
139139
.route("/api/monitoring/enphase/debug", get(enphase_debug_handler))
140140
.route("/api/monitoring/gpu/debug", get(gpu_debug_handler))
141141
.route("/api/monitoring/trivy/debug", get(trivy_debug_handler))
142+
.route(
143+
"/api/monitoring/litellm/metrics",
144+
get(litellm_metrics_handler),
145+
)
142146
.route("/api/dashboard/metrics", get(metrics_handler)) // Dashboard metrics
143147
.route("/api/chat", post(post_chat_handler))
144148
.route("/api/prometheus/range", get(prometheus_range_handler))

static/js/dashboard.js

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,10 +643,85 @@ const MetricsManager = {
643643
</p>
644644
</div>
645645
` : ''}
646+
647+
<h3 style="${sectionHeaderStyle}">🤖 LiteLLM AI Gateway</h3>
648+
<div id="litellm-metrics-container">
649+
<div class="loading-mini" style="text-align: center; padding: 2rem; opacity: 0.5;">SCANNING_AI_NEURAL_NET...</div>
650+
</div>
646651
`;
647652

648653
// Load 24h graph data asynchronously
649654
this.loadSolarGraph();
655+
656+
// Load LiteLLM metrics asynchronously
657+
this.loadLitellmMetrics();
658+
},
659+
660+
/**
661+
* Load LiteLLM metrics asynchronously
662+
*/
663+
async loadLitellmMetrics() {
664+
const container = document.getElementById('litellm-metrics-container');
665+
if (!container) return;
666+
667+
try {
668+
const data = await api.get('/api/monitoring/litellm/metrics');
669+
670+
if (!data) throw new Error('No data received');
671+
672+
let modelsHtml = '';
673+
if (data.models && data.models.data) {
674+
// Take top 5 models if many
675+
const models = data.models.data.slice(0, 5);
676+
modelsHtml = `
677+
<div style="margin-top: 1rem; font-size: 0.8rem; border-top: 1px solid rgba(0,255,249,0.1); padding-top: 0.5rem;">
678+
<div style="color: var(--neon-cyan); margin-bottom: 0.5rem; font-family: 'Orbitron'; font-size: 0.7rem;">ACTIVE_MODELS</div>
679+
${models.map(m => `
680+
<div style="display: flex; justify-content: space-between; margin-bottom: 3px; font-family: 'JetBrains Mono';">
681+
<span style="opacity: 0.7;">${m.model_name || m.id}</span>
682+
<span style="color: var(--neon-green);">${m.litellm_params?.model || 'active'}</span>
683+
</div>
684+
`).join('')}
685+
</div>
686+
`;
687+
}
688+
689+
container.innerHTML = `
690+
<div class="metrics-grid">
691+
<div class="metric-card ${data.healthy ? '' : 'alert-critical'}">
692+
<div class="metric-icon">🛡️</div>
693+
<div class="metric-value">${data.healthy ? 'HEALTHY' : 'OFFLINE'}</div>
694+
<div class="metric-label">Gateway Status</div>
695+
</div>
696+
<div class="metric-card">
697+
<div class="metric-icon">🧠</div>
698+
<div class="metric-value">${data.model_count || 0}</div>
699+
<div class="metric-label">Active Models</div>
700+
</div>
701+
<div class="metric-card">
702+
<div class="metric-icon">💳</div>
703+
<div class="metric-value">$${data.total_spend?.toFixed(4) || '0.000'}</div>
704+
<div class="metric-label">Recent Spend</div>
705+
</div>
706+
<div class="metric-card">
707+
<div class="metric-icon">📈</div>
708+
<div class="metric-value">${data.request_count || 0}</div>
709+
<div class="metric-label">Recent Requests</div>
710+
</div>
711+
</div>
712+
${modelsHtml}
713+
`;
714+
} catch (error) {
715+
console.error('LiteLLM metrics error:', error);
716+
container.innerHTML = `
717+
<div style="padding: 1rem; background: rgba(255,0,0,0.05); border-radius: 4px; border-left: 3px solid var(--neon-magenta);">
718+
<p style="margin: 0; font-size: 0.8rem; color: var(--text-secondary);">
719+
⚠️ <strong>LiteLLM Metrics Offline</strong> - Unable to fetch gateway data.<br>
720+
<span style="font-size: 0.7rem; opacity: 0.7;">${error.message}</span>
721+
</p>
722+
</div>
723+
`;
724+
}
650725
},
651726

652727
/**

static/partials/metrics.html

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ <h2 class="section-title">📊 Cluster Metrics (Prometheus)</h2>
4343
style="border-color: var(--neon-orange, #ff8800); color: var(--neon-orange, #ff8800); text-decoration: none; display: flex; align-items: center; gap: 8px;">
4444
<span>🔥</span> Prometheus Query
4545
</a>
46+
<a href="https://litellm.p.zacharie.org/ui" target="_blank" class="cyber-btn"
47+
style="border-color: var(--neon-cyan); color: var(--neon-cyan); text-decoration: none; display: flex; align-items: center; gap: 8px;">
48+
<span>🤖</span> LiteLLM Dashboard
49+
</a>
4650
</div>
4751

4852
<div id="metrics-content">

0 commit comments

Comments
 (0)