Skip to content

Commit dea9873

Browse files
authored
proxy: extract metrics for v1/messages (#419)
1 parent bccce5f commit dea9873

File tree

1 file changed

+43
-19
lines changed

1 file changed

+43
-19
lines changed

proxy/metrics_monitor.go

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,18 @@ func (mp *metricsMonitor) wrapHandler(
122122
}
123123
} else {
124124
if gjson.ValidBytes(body) {
125-
if tm, err := parseMetrics(modelID, recorder.StartTime(), gjson.ParseBytes(body)); err != nil {
126-
mp.logger.Warnf("error parsing metrics: %v, path=%s", err, request.URL.Path)
127-
} else {
128-
mp.addMetrics(tm)
125+
parsed := gjson.ParseBytes(body)
126+
usage := parsed.Get("usage")
127+
timings := parsed.Get("timings")
128+
129+
if usage.Exists() || timings.Exists() {
130+
if tm, err := parseMetrics(modelID, recorder.StartTime(), usage, timings); err != nil {
131+
mp.logger.Warnf("error parsing metrics: %v, path=%s", err, request.URL.Path)
132+
} else {
133+
mp.addMetrics(tm)
134+
}
129135
}
136+
130137
} else {
131138
mp.logger.Warnf("metrics skipped, invalid JSON in response body path=%s", request.URL.Path)
132139
}
@@ -174,19 +181,20 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
174181
}
175182

176183
if gjson.ValidBytes(data) {
177-
return parseMetrics(modelID, start, gjson.ParseBytes(data))
184+
parsed := gjson.ParseBytes(data)
185+
usage := parsed.Get("usage")
186+
timings := parsed.Get("timings")
187+
188+
if usage.Exists() || timings.Exists() {
189+
return parseMetrics(modelID, start, usage, timings)
190+
}
178191
}
179192
}
180193

181194
return TokenMetrics{}, fmt.Errorf("no valid JSON data found in stream")
182195
}
183196

184-
func parseMetrics(modelID string, start time.Time, jsonData gjson.Result) (TokenMetrics, error) {
185-
usage := jsonData.Get("usage")
186-
timings := jsonData.Get("timings")
187-
if !usage.Exists() && !timings.Exists() {
188-
return TokenMetrics{}, fmt.Errorf("no usage or timings data found")
189-
}
197+
func parseMetrics(modelID string, start time.Time, usage, timings gjson.Result) (TokenMetrics, error) {
190198
// default values
191199
cachedTokens := -1 // unknown or missing data
192200
outputTokens := 0
@@ -198,19 +206,35 @@ func parseMetrics(modelID string, start time.Time, jsonData gjson.Result) (Token
198206
durationMs := int(time.Since(start).Milliseconds())
199207

200208
if usage.Exists() {
201-
outputTokens = int(jsonData.Get("usage.completion_tokens").Int())
202-
inputTokens = int(jsonData.Get("usage.prompt_tokens").Int())
209+
if pt := usage.Get("prompt_tokens"); pt.Exists() {
210+
// v1/chat/completions
211+
inputTokens = int(pt.Int())
212+
} else if it := usage.Get("input_tokens"); it.Exists() {
213+
// v1/messages
214+
inputTokens = int(it.Int())
215+
}
216+
217+
if ct := usage.Get("completion_tokens"); ct.Exists() {
218+
// v1/chat/completions
219+
outputTokens = int(ct.Int())
220+
} else if ot := usage.Get("output_tokens"); ot.Exists() {
221+
outputTokens = int(ot.Int())
222+
}
223+
224+
if ct := usage.Get("cache_read_input_tokens"); ct.Exists() {
225+
cachedTokens = int(ct.Int())
226+
}
203227
}
204228

205229
// use llama-server's timing data for tok/sec and duration as it is more accurate
206230
if timings.Exists() {
207-
inputTokens = int(jsonData.Get("timings.prompt_n").Int())
208-
outputTokens = int(jsonData.Get("timings.predicted_n").Int())
209-
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
210-
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
211-
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
231+
inputTokens = int(timings.Get("prompt_n").Int())
232+
outputTokens = int(timings.Get("predicted_n").Int())
233+
promptPerSecond = timings.Get("prompt_per_second").Float()
234+
tokensPerSecond = timings.Get("predicted_per_second").Float()
235+
durationMs = int(timings.Get("prompt_ms").Float() + timings.Get("predicted_ms").Float())
212236

213-
if cachedValue := jsonData.Get("timings.cache_n"); cachedValue.Exists() {
237+
if cachedValue := timings.Get("cache_n"); cachedValue.Exists() {
214238
cachedTokens = int(cachedValue.Int())
215239
}
216240
}

0 commit comments

Comments
 (0)