diff --git a/proxy/metrics_middleware.go b/proxy/metrics_middleware.go index adffe973..734d75ab 100644 --- a/proxy/metrics_middleware.go +++ b/proxy/metrics_middleware.go @@ -61,7 +61,6 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc { } else { writer.metricsRecorder.processNonStreamingResponse(writer.body) } - } } @@ -73,6 +72,7 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool { } // default values + cachedTokens := -1 // unknown or missing data outputTokens := 0 inputTokens := 0 @@ -93,11 +93,16 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool { promptPerSecond = jsonData.Get("timings.prompt_per_second").Float() tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float() durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float()) + + if cachedValue := jsonData.Get("timings.cache_n"); cachedValue.Exists() { + cachedTokens = int(cachedValue.Int()) + } } rec.metricsMonitor.addMetrics(TokenMetrics{ Timestamp: time.Now(), Model: rec.realModelName, + CachedTokens: cachedTokens, InputTokens: inputTokens, OutputTokens: outputTokens, PromptPerSecond: promptPerSecond, diff --git a/proxy/metrics_monitor.go b/proxy/metrics_monitor.go index 0ce4efda..ee11f2ac 100644 --- a/proxy/metrics_monitor.go +++ b/proxy/metrics_monitor.go @@ -13,6 +13,7 @@ type TokenMetrics struct { ID int `json:"id"` Timestamp time.Time `json:"timestamp"` Model string `json:"model"` + CachedTokens int `json:"cache_tokens"` InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` PromptPerSecond float64 `json:"prompt_per_second"` @@ -61,7 +62,6 @@ func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) { if len(mp.metrics) > mp.maxMetrics { mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:] } - event.Emit(TokenMetricsEvent{Metrics: metric}) } diff --git a/ui/src/contexts/APIProvider.tsx b/ui/src/contexts/APIProvider.tsx index 8365cb3a..19a636be 100644 --- a/ui/src/contexts/APIProvider.tsx +++ b/ui/src/contexts/APIProvider.tsx @@ -28,6 +28,7 @@ interface Metrics { id: number; timestamp: string; model: string; + cache_tokens: number; input_tokens: number; output_tokens: number; prompt_per_second: number; diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index de8ab3ec..772460e3 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -1,10 +1,6 @@ import { useMemo } from "react"; import { useAPI } from "../contexts/APIProvider"; -const formatTimestamp = (timestamp: string): string => { - return new Date(timestamp).toLocaleString(); -}; - const formatSpeed = (speed: number): string => { return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s"; }; @@ -13,6 +9,33 @@ const formatDuration = (ms: number): string => { return (ms / 1000).toFixed(2) + "s"; }; +const formatRelativeTime = (timestamp: string): string => { + const now = new Date(); + const date = new Date(timestamp); + const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000); + + // Handle future dates by returning "just now" + if (diffInSeconds < 5) { + return "now"; + } + + if (diffInSeconds < 60) { + return `${diffInSeconds}s ago`; + } + + const diffInMinutes = Math.floor(diffInSeconds / 60); + if (diffInMinutes < 60) { + return `${diffInMinutes}m ago`; + } + + const diffInHours = Math.floor(diffInMinutes / 60); + if (diffInHours < 24) { + return `${diffInHours}h ago`; + } + + return "a while ago"; +}; + const ActivityPage = () => { const { metrics } = useAPI(); const sortedMetrics = useMemo(() => { @@ -32,11 +55,16 @@ const ActivityPage = () => { - - + + - - + + + @@ -46,8 +74,11 @@ const ActivityPage = () => { {sortedMetrics.map((metric) => ( - + + @@ -63,4 +94,28 @@ const ActivityPage = () => { ); }; +interface TooltipProps { + content: string; +} + +const Tooltip: React.FC = ({ content }) => { + return ( +
+ ⓘ +
+ {content} +
+
+
+ ); +}; + export default ActivityPage;
IdTimestampIDTime ModelInput TokensOutput Tokens + Cached + + Prompt + Generated Prompt Processing Generation Speed Duration
{metric.id + 1 /* un-zero index */}{formatTimestamp(metric.timestamp)}{formatRelativeTime(metric.timestamp)} {metric.model} + {metric.cache_tokens > 0 ? metric.cache_tokens.toLocaleString() : "-"} + {metric.input_tokens.toLocaleString()} {metric.output_tokens.toLocaleString()} {formatSpeed(metric.prompt_per_second)}