@@ -122,11 +122,18 @@ func (mp *metricsMonitor) wrapHandler(
122122 }
123123 } else {
124124 if gjson .ValidBytes (body ) {
125- if tm , err := parseMetrics (modelID , recorder .StartTime (), gjson .ParseBytes (body )); err != nil {
126- mp .logger .Warnf ("error parsing metrics: %v, path=%s" , err , request .URL .Path )
127- } else {
128- mp .addMetrics (tm )
125+ parsed := gjson .ParseBytes (body )
126+ usage := parsed .Get ("usage" )
127+ timings := parsed .Get ("timings" )
128+
129+ if usage .Exists () || timings .Exists () {
130+ if tm , err := parseMetrics (modelID , recorder .StartTime (), usage , timings ); err != nil {
131+ mp .logger .Warnf ("error parsing metrics: %v, path=%s" , err , request .URL .Path )
132+ } else {
133+ mp .addMetrics (tm )
134+ }
129135 }
136+
130137 } else {
131138 mp .logger .Warnf ("metrics skipped, invalid JSON in response body path=%s" , request .URL .Path )
132139 }
@@ -174,19 +181,20 @@ func processStreamingResponse(modelID string, start time.Time, body []byte) (Tok
174181 }
175182
176183 if gjson .ValidBytes (data ) {
177- return parseMetrics (modelID , start , gjson .ParseBytes (data ))
184+ parsed := gjson .ParseBytes (data )
185+ usage := parsed .Get ("usage" )
186+ timings := parsed .Get ("timings" )
187+
188+ if usage .Exists () || timings .Exists () {
189+ return parseMetrics (modelID , start , usage , timings )
190+ }
178191 }
179192 }
180193
181194 return TokenMetrics {}, fmt .Errorf ("no valid JSON data found in stream" )
182195}
183196
184- func parseMetrics (modelID string , start time.Time , jsonData gjson.Result ) (TokenMetrics , error ) {
185- usage := jsonData .Get ("usage" )
186- timings := jsonData .Get ("timings" )
187- if ! usage .Exists () && ! timings .Exists () {
188- return TokenMetrics {}, fmt .Errorf ("no usage or timings data found" )
189- }
197+ func parseMetrics (modelID string , start time.Time , usage , timings gjson.Result ) (TokenMetrics , error ) {
190198 // default values
191199 cachedTokens := - 1 // unknown or missing data
192200 outputTokens := 0
@@ -198,19 +206,35 @@ func parseMetrics(modelID string, start time.Time, jsonData gjson.Result) (Token
198206 durationMs := int (time .Since (start ).Milliseconds ())
199207
200208 if usage .Exists () {
201- outputTokens = int (jsonData .Get ("usage.completion_tokens" ).Int ())
202- inputTokens = int (jsonData .Get ("usage.prompt_tokens" ).Int ())
209+ if pt := usage .Get ("prompt_tokens" ); pt .Exists () {
210+ // v1/chat/completions
211+ inputTokens = int (pt .Int ())
212+ } else if it := usage .Get ("input_tokens" ); it .Exists () {
213+ // v1/messages
214+ inputTokens = int (it .Int ())
215+ }
216+
217+ if ct := usage .Get ("completion_tokens" ); ct .Exists () {
218+ // v1/chat/completions
219+ outputTokens = int (ct .Int ())
220+ } else if ot := usage .Get ("output_tokens" ); ot .Exists () {
221+ outputTokens = int (ot .Int ())
222+ }
223+
224+ if ct := usage .Get ("cache_read_input_tokens" ); ct .Exists () {
225+ cachedTokens = int (ct .Int ())
226+ }
203227 }
204228
205229 // use llama-server's timing data for tok/sec and duration as it is more accurate
206230 if timings .Exists () {
207- inputTokens = int (jsonData .Get ("timings. prompt_n" ).Int ())
208- outputTokens = int (jsonData .Get ("timings. predicted_n" ).Int ())
209- promptPerSecond = jsonData .Get ("timings. prompt_per_second" ).Float ()
210- tokensPerSecond = jsonData .Get ("timings. predicted_per_second" ).Float ()
211- durationMs = int (jsonData .Get ("timings. prompt_ms" ).Float () + jsonData .Get ("timings. predicted_ms" ).Float ())
231+ inputTokens = int (timings .Get ("prompt_n" ).Int ())
232+ outputTokens = int (timings .Get ("predicted_n" ).Int ())
233+ promptPerSecond = timings .Get ("prompt_per_second" ).Float ()
234+ tokensPerSecond = timings .Get ("predicted_per_second" ).Float ()
235+ durationMs = int (timings .Get ("prompt_ms" ).Float () + timings .Get ("predicted_ms" ).Float ())
212236
213- if cachedValue := jsonData .Get ("timings. cache_n" ); cachedValue .Exists () {
237+ if cachedValue := timings .Get ("cache_n" ); cachedValue .Exists () {
214238 cachedTokens = int (cachedValue .Int ())
215239 }
216240 }
0 commit comments