From 570d52770ee2a9b324198206969f39b822502737 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sat, 6 Sep 2025 12:13:27 -0700 Subject: [PATCH 1/6] show cached prompt tokens in metrics --- proxy/metrics_middleware.go | 6 ++++++ proxy/metrics_monitor.go | 2 +- ui/src/contexts/APIProvider.tsx | 1 + ui/src/pages/Activity.tsx | 33 ++++++++++++++++++++++++++++++--- 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/proxy/metrics_middleware.go b/proxy/metrics_middleware.go index adffe973..169b2f2a 100644 --- a/proxy/metrics_middleware.go +++ b/proxy/metrics_middleware.go @@ -73,6 +73,7 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool { } // default values + cachedTokens := -1 // unknown or missing data outputTokens := 0 inputTokens := 0 @@ -93,11 +94,16 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool { promptPerSecond = jsonData.Get("timings.prompt_per_second").Float() tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float() durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float()) + + if cachedValue := jsonData.Get("timings.cache_n"); cachedValue.Exists() { + cachedTokens = int(cachedValue.Int()) + } } rec.metricsMonitor.addMetrics(TokenMetrics{ Timestamp: time.Now(), Model: rec.realModelName, + CachedTokens: cachedTokens, InputTokens: inputTokens, OutputTokens: outputTokens, PromptPerSecond: promptPerSecond, diff --git a/proxy/metrics_monitor.go b/proxy/metrics_monitor.go index 0ce4efda..ee11f2ac 100644 --- a/proxy/metrics_monitor.go +++ b/proxy/metrics_monitor.go @@ -13,6 +13,7 @@ type TokenMetrics struct { ID int `json:"id"` Timestamp time.Time `json:"timestamp"` Model string `json:"model"` + CachedTokens int `json:"cache_tokens"` InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` PromptPerSecond float64 `json:"prompt_per_second"` @@ -61,7 +62,6 @@ func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) { if len(mp.metrics) > mp.maxMetrics { mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:] } - event.Emit(TokenMetricsEvent{Metrics: metric}) } diff --git a/ui/src/contexts/APIProvider.tsx b/ui/src/contexts/APIProvider.tsx index 8365cb3a..19a636be 100644 --- a/ui/src/contexts/APIProvider.tsx +++ b/ui/src/contexts/APIProvider.tsx @@ -28,6 +28,7 @@ interface Metrics { id: number; timestamp: string; model: string; + cache_tokens: number; input_tokens: number; output_tokens: number; prompt_per_second: number; diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index de8ab3ec..c43aa149 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -13,6 +13,28 @@ const formatDuration = (ms: number): string => { return (ms / 1000).toFixed(2) + "s"; }; +const formatRelativeTime = (timestamp: string): string => { + const now = new Date(); + const date = new Date(timestamp); + const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000); + + if (diffInSeconds < 60) { + return `${diffInSeconds} second${diffInSeconds !== 1 ? "s" : ""} ago`; + } + + const diffInMinutes = Math.floor(diffInSeconds / 60); + if (diffInMinutes < 60) { + return `${diffInMinutes} minute${diffInMinutes !== 1 ? "s" : ""} ago`; + } + + const diffInHours = Math.floor(diffInMinutes / 60); + if (diffInHours < 24) { + return `${diffInHours} hour${diffInHours !== 1 ? "s" : ""} ago`; + } + + return "a long time ago"; +}; + const ActivityPage = () => { const { metrics } = useAPI(); const sortedMetrics = useMemo(() => { @@ -35,8 +57,9 @@ const ActivityPage = () => { Id Timestamp Model - Input Tokens - Output Tokens + Prompt + Cached + Generated Prompt Processing Generation Speed Duration @@ -46,9 +69,13 @@ const ActivityPage = () => { {sortedMetrics.map((metric) => ( {metric.id + 1 /* un-zero index */} - {formatTimestamp(metric.timestamp)} + {formatRelativeTime(metric.timestamp)} {metric.model} {metric.input_tokens.toLocaleString()} + + {metric.cache_tokens > 0 ? metric.cache_tokens.toLocaleString() : "-"} + + {metric.output_tokens.toLocaleString()} {formatSpeed(metric.prompt_per_second)} {formatSpeed(metric.tokens_per_second)} From 9c6193e5f221b7ceb4d267610278382d1c4f5ee0 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sat, 6 Sep 2025 12:23:14 -0700 Subject: [PATCH 2/6] tidy up typescript --- proxy/metrics_middleware.go | 1 - ui/src/pages/Activity.tsx | 19 ++++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/proxy/metrics_middleware.go b/proxy/metrics_middleware.go index 169b2f2a..734d75ab 100644 --- a/proxy/metrics_middleware.go +++ b/proxy/metrics_middleware.go @@ -61,7 +61,6 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc { } else { writer.metricsRecorder.processNonStreamingResponse(writer.body) } - } } diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index c43aa149..6552be9a 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -1,10 +1,6 @@ import { useMemo } from "react"; import { useAPI } from "../contexts/APIProvider"; -const formatTimestamp = (timestamp: string): string => { - return new Date(timestamp).toLocaleString(); -}; - const formatSpeed = (speed: number): string => { return speed < 0 ? "unknown" : speed.toFixed(2) + " t/s"; }; @@ -18,21 +14,26 @@ const formatRelativeTime = (timestamp: string): string => { const date = new Date(timestamp); const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000); + // Handle future dates by returning "just now" + if (diffInSeconds < 0) { + return "now"; + } + if (diffInSeconds < 60) { - return `${diffInSeconds} second${diffInSeconds !== 1 ? "s" : ""} ago`; + return `${diffInSeconds}s ago`; } const diffInMinutes = Math.floor(diffInSeconds / 60); if (diffInMinutes < 60) { - return `${diffInMinutes} minute${diffInMinutes !== 1 ? "s" : ""} ago`; + return `${diffInMinutes}m ago`; } const diffInHours = Math.floor(diffInMinutes / 60); if (diffInHours < 24) { - return `${diffInHours} hour${diffInHours !== 1 ? "s" : ""} ago`; + return `${diffInHours}h ago`; } - return "a long time ago"; + return "a while ago"; }; const ActivityPage = () => { @@ -55,7 +56,7 @@ const ActivityPage = () => { Id - Timestamp + Time Model Prompt Cached From 32b73f9f262435539d29d07cb3f5b5be3723a99d Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sat, 6 Sep 2025 12:43:23 -0700 Subject: [PATCH 3/6] add tooltip to headers in activity page --- ui/src/pages/Activity.tsx | 40 +++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index 6552be9a..ec0f2696 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -1,4 +1,4 @@ -import { useMemo } from "react"; +import { useMemo, useRef, useState, useEffect } from "react"; import { useAPI } from "../contexts/APIProvider"; const formatSpeed = (speed: number): string => { @@ -55,11 +55,16 @@ const ActivityPage = () => { - + - - + + @@ -72,11 +77,10 @@ const ActivityPage = () => { - - + @@ -91,4 +95,28 @@ const ActivityPage = () => { ); }; +interface TooltipProps { + content: string; +} + +const Tooltip: React.FC = ({ content }) => { + return ( +
+ ⓘ +
+ {content} +
+
+
+ ); +}; + export default ActivityPage; From ae004ed51d0081576c9d648bdb8cb2878d0420b4 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sat, 6 Sep 2025 12:43:48 -0700 Subject: [PATCH 4/6] fix typescript build issues --- ui/src/pages/Activity.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index ec0f2696..6550e489 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -1,4 +1,4 @@ -import { useMemo, useRef, useState, useEffect } from "react"; +import { useMemo } from "react"; import { useAPI } from "../contexts/APIProvider"; const formatSpeed = (speed: number): string => { From 3118f86f6dffb19efc1990e76460d752ffcd520c Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sat, 6 Sep 2025 12:45:51 -0700 Subject: [PATCH 5/6] tweak activity now timestamp --- ui/src/pages/Activity.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index 6550e489..0891b187 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -15,7 +15,7 @@ const formatRelativeTime = (timestamp: string): string => { const diffInSeconds = Math.floor((now.getTime() - date.getTime()) / 1000); // Handle future dates by returning "just now" - if (diffInSeconds < 0) { + if (diffInSeconds < 5) { return "now"; } From a023624ccabef87699ff915a2e0d44c276bfd022 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sat, 6 Sep 2025 12:48:44 -0700 Subject: [PATCH 6/6] add a space --- ui/src/pages/Activity.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index 0891b187..772460e3 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -62,8 +62,7 @@ const ActivityPage = () => { Cached
IdID Time ModelPromptCached + Cached + + Prompt + + Generated Prompt Processing Generation Speed{metric.id + 1 /* un-zero index */} {formatRelativeTime(metric.timestamp)} {metric.model}{metric.input_tokens.toLocaleString()} {metric.cache_tokens > 0 ? metric.cache_tokens.toLocaleString() : "-"} {metric.input_tokens.toLocaleString()} {metric.output_tokens.toLocaleString()} {formatSpeed(metric.prompt_per_second)} {formatSpeed(metric.tokens_per_second)} - Prompt - + Prompt Generated Prompt Processing