diff --git a/config.example.yaml b/config.example.yaml index 806c500e..7db93d41 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -15,6 +15,11 @@ healthCheckTimeout: 500 # - Valid log levels: debug, info, warn, error logLevel: info +# logHTTPRequests: enable logging of all HTTP requests +# - optional, default: false +# - when true, logs all incoming HTTP request and response bodies +logHTTPRequests: false + # metricsMaxInMemory: maximum number of metrics to keep in memory # - optional, default: 1000 # - controls how many metrics are stored in memory before older ones are discarded diff --git a/proxy/config.go b/proxy/config.go index ee72747d..62138b5a 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -141,6 +141,7 @@ func (c *GroupConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type Config struct { HealthCheckTimeout int `yaml:"healthCheckTimeout"` LogRequests bool `yaml:"logRequests"` + LogHTTPRequests bool `yaml:"logHTTPRequests"` LogLevel string `yaml:"logLevel"` MetricsMaxInMemory int `yaml:"metricsMaxInMemory"` Models map[string]ModelConfig `yaml:"models"` /* key is model ID */ @@ -193,6 +194,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { // default configuration values config := Config{ HealthCheckTimeout: 120, + LogHTTPRequests: false, StartPort: 5800, LogLevel: "info", MetricsMaxInMemory: 1000, diff --git a/proxy/metrics_middleware.go b/proxy/metrics_middleware.go index ee17717f..48f142c2 100644 --- a/proxy/metrics_middleware.go +++ b/proxy/metrics_middleware.go @@ -36,14 +36,18 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc { return } + metricsRecorder := &MetricsRecorder{ + metricsMonitor: pm.metricsMonitor, + realModelName: realModelName, + isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(), + startTime: time.Now(), + } + if pm.metricsMonitor.logHTTPRequests { + metricsRecorder.requestBody = bodyBytes + } writer := &MetricsResponseWriter{ - ResponseWriter: c.Writer, - metricsRecorder: &MetricsRecorder{ - metricsMonitor: pm.metricsMonitor, - realModelName: realModelName, - isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(), - startTime: time.Now(), - }, + ResponseWriter: c.Writer, + metricsRecorder: metricsRecorder, } c.Writer = writer c.Next() @@ -58,6 +62,7 @@ type MetricsRecorder struct { realModelName string isStreaming bool startTime time.Time + requestBody []byte } // processBody handles response processing after request completes @@ -69,7 +74,8 @@ func (rec *MetricsRecorder) processBody(body []byte) { } } -func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool { +func (rec *MetricsRecorder) parseAndRecordMetrics(responseBody []byte) bool { + jsonData := gjson.ParseBytes(responseBody) usage := jsonData.Get("usage") if !usage.Exists() { return false @@ -87,14 +93,19 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool { durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float()) } - rec.metricsMonitor.addMetrics(TokenMetrics{ + metrics := TokenMetrics{ Timestamp: time.Now(), Model: rec.realModelName, InputTokens: inputTokens, OutputTokens: outputTokens, TokensPerSecond: tokensPerSecond, DurationMs: durationMs, - }) + } + if rec.metricsMonitor.logHTTPRequests { + metrics.RequestBody = string(rec.requestBody) + metrics.ResponseBody = string(responseBody) + } + rec.metricsMonitor.addMetrics(metrics) return true } @@ -126,10 +137,8 @@ func (rec *MetricsRecorder) processStreamingResponse(body []byte) { continue } - if gjson.ValidBytes(data) { - if rec.parseAndRecordMetrics(gjson.ParseBytes(data)) { - return // short circuit if a metric was recorded - } + if gjson.ValidBytes(data) && rec.parseAndRecordMetrics(data) { + return // short circuit if a metric was recorded } } } @@ -141,7 +150,7 @@ func (rec *MetricsRecorder) processNonStreamingResponse(body []byte) { // Parse JSON to extract usage information if gjson.ValidBytes(body) { - rec.parseAndRecordMetrics(gjson.ParseBytes(body)) + rec.parseAndRecordMetrics(body) } } diff --git a/proxy/metrics_monitor.go b/proxy/metrics_monitor.go index 050b95e0..dd1efbea 100644 --- a/proxy/metrics_monitor.go +++ b/proxy/metrics_monitor.go @@ -17,6 +17,8 @@ type TokenMetrics struct { OutputTokens int `json:"output_tokens"` TokensPerSecond float64 `json:"tokens_per_second"` DurationMs int `json:"duration_ms"` + RequestBody string `json:"request_body,omitempty"` + ResponseBody string `json:"response_body,omitempty"` } // TokenMetricsEvent represents a token metrics event @@ -30,10 +32,11 @@ func (e TokenMetricsEvent) Type() uint32 { // MetricsMonitor parses llama-server output for token statistics type MetricsMonitor struct { - mu sync.RWMutex - metrics []TokenMetrics - maxMetrics int - nextID int + mu sync.RWMutex + metrics []TokenMetrics + maxMetrics int + nextID int + logHTTPRequests bool } func NewMetricsMonitor(config *Config) *MetricsMonitor { @@ -43,13 +46,15 @@ func NewMetricsMonitor(config *Config) *MetricsMonitor { } mp := &MetricsMonitor{ - maxMetrics: maxMetrics, + maxMetrics: maxMetrics, + logHTTPRequests: config.LogHTTPRequests, } return mp } // addMetrics adds a new metric to the collection and publishes an event +// If logHTTPRequests is enabled, it records the request and response bodies func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) { mp.mu.Lock() defer mp.mu.Unlock() diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go index 83c5bdc6..3c05ce82 100644 --- a/proxy/proxymanager_test.go +++ b/proxy/proxymanager_test.go @@ -773,3 +773,43 @@ func TestProxyManager_HealthEndpoint(t *testing.T) { assert.Equal(t, http.StatusOK, rec.Code) assert.Equal(t, "OK", rec.Body.String()) } + +func TestProxyManager_RequestResponseBodyIsRecorded(t *testing.T) { + // Create config with logHTTPRequests enabled + config := AddDefaultGroupToConfig(Config{ + HealthCheckTimeout: 15, + Models: map[string]ModelConfig{ + "model1": getTestSimpleResponderConfig("model1"), + }, + LogLevel: "error", + LogHTTPRequests: true, + MetricsMaxInMemory: 100, + }) + + proxy := New(config) + defer proxy.StopProcesses(StopWaitForInflightRequest) + + // Make a request + reqBody := `{"model":"model1", "prompt": "test prompt"}` + req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody)) + w := httptest.NewRecorder() + + proxy.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + + // Check that metrics were recorded + metrics := proxy.metricsMonitor.GetMetrics() + if !assert.NotEmpty(t, metrics, "metrics should be recorded") { + return + } + + // Verify the last metric has request and response bodies + lastMetric := metrics[len(metrics)-1] + assert.NotEmpty(t, lastMetric.RequestBody, "request body should be recorded when logHTTPRequests is true") + assert.NotEmpty(t, lastMetric.ResponseBody, "response body should be recorded when logHTTPRequests is true") + + // Verify the content matches what we sent and received + assert.Contains(t, lastMetric.RequestBody, "model1", "request body should contain the model name") + assert.Contains(t, lastMetric.RequestBody, "test prompt", "request body should contain the prompt") + assert.Contains(t, lastMetric.ResponseBody, "model1", "response body should contain the model name") +} diff --git a/ui/src/contexts/APIProvider.tsx b/ui/src/contexts/APIProvider.tsx index f5287773..d548d282 100644 --- a/ui/src/contexts/APIProvider.tsx +++ b/ui/src/contexts/APIProvider.tsx @@ -30,6 +30,8 @@ interface Metrics { output_tokens: number; tokens_per_second: number; duration_ms: number; + request_body?: string; + response_body?: string; } interface LogData { diff --git a/ui/src/pages/Activity.tsx b/ui/src/pages/Activity.tsx index 70cae421..6d952e79 100644 --- a/ui/src/pages/Activity.tsx +++ b/ui/src/pages/Activity.tsx @@ -1,4 +1,4 @@ -import { useState, useEffect } from "react"; +import { useState, useEffect, Fragment } from "react"; import { useAPI } from "../contexts/APIProvider"; const formatTimestamp = (timestamp: string): string => { @@ -16,6 +16,8 @@ const formatDuration = (ms: number): string => { const ActivityPage = () => { const { metrics } = useAPI(); const [error, setError] = useState(null); + const [expandedMetrics, setExpandedMetrics] = useState>(new Set()); + const [parseJson, setParseJson] = useState(false); useEffect(() => { if (metrics.length > 0) { @@ -23,6 +25,17 @@ const ActivityPage = () => { } }, [metrics]); + const beautifyJson = (jsonString?: string): string => { + if (typeof jsonString !== "string") + return ""; + try { + const parsed = JSON.parse(jsonString); + return JSON.stringify(parsed, null, 2); + } catch (e) { + return jsonString; + } + }; + if (error) { return (
@@ -34,10 +47,79 @@ const ActivityPage = () => { ); } + const toggleExpanded = (id: string) => { + setExpandedMetrics(prev => { + const newSet = new Set(prev); + if (newSet.has(id)) { + newSet.delete(id); + } else { + newSet.add(id); + } + return newSet; + }); + }; + + const renderMetricRow = (metric: typeof metrics[0], index: number) => { + const key = `${metric.id}-${index}`; + const isExpanded = expandedMetrics.has(key); + const hasRequestData = metric.request_body && metric.response_body; + + return ( + + + {formatTimestamp(metric.timestamp)} + {metric.model} + {metric.input_tokens.toLocaleString()} + {metric.output_tokens.toLocaleString()} + {formatSpeed(metric.tokens_per_second)} + {formatDuration(metric.duration_ms)} + {hasRequestData && ( + + + + )} + + {isExpanded && hasRequestData && ( + + +
+

Request

+
+                  {parseJson ? beautifyJson(metric.request_body) : metric.request_body}
+                
+

Response

+
+                  {parseJson ? beautifyJson(metric.response_body) : metric.response_body}
+                
+
+ + + )} +
+ ); + }; + return (

Activity

+
+ setParseJson(e.target.checked)} + /> + +
+ {metrics.length === 0 ? (

No metrics data available

@@ -53,19 +135,11 @@ const ActivityPage = () => { Output Tokens Generation Speed Duration + Request data - {metrics.map((metric, index) => ( - - {formatTimestamp(metric.timestamp)} - {metric.model} - {metric.input_tokens.toLocaleString()} - {metric.output_tokens.toLocaleString()} - {formatSpeed(metric.tokens_per_second)} - {formatDuration(metric.duration_ms)} - - ))} + {metrics.map(renderMetricRow)}