@@ -1093,9 +1093,9 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
10931093 Cost : messageCost ,
10941094 Model : messageModel ,
10951095 }
1096- }
1097- if res .RateLimit != nil {
1098- msgUsage . RateLimit = * res . RateLimit
1096+ if res . RateLimit != nil {
1097+ msgUsage .RateLimit = * res . RateLimit
1098+ }
10991099 }
11001100
11011101 addAgentMessage (sess , a , & assistantMessage , events )
@@ -1270,6 +1270,7 @@ func (r *LocalRuntime) handleStream(ctx context.Context, stream chat.MessageStre
12701270 var actualModelEventEmitted bool
12711271 var messageUsage * chat.Usage
12721272 var messageRateLimit * chat.RateLimit
1273+ var prevStreamCost float64 // cost contributed by previous usage emission in this stream
12731274
12741275 modelID := getAgentModelID (a )
12751276 toolCallIndex := make (map [string ]int ) // toolCallID -> index in toolCalls slice
@@ -1292,11 +1293,12 @@ func (r *LocalRuntime) handleStream(ctx context.Context, stream chat.MessageStre
12921293 messageUsage = response .Usage
12931294
12941295 if m != nil && m .Cost != nil {
1295- cost := float64 (response .Usage .InputTokens )* m .Cost .Input +
1296+ streamCost := ( float64 (response .Usage .InputTokens )* m .Cost .Input +
12961297 float64 (response .Usage .OutputTokens )* m .Cost .Output +
12971298 float64 (response .Usage .CachedInputTokens )* m .Cost .CacheRead +
1298- float64 (response .Usage .CacheWriteTokens )* m .Cost .CacheWrite
1299- sess .Cost += cost / 1e6
1299+ float64 (response .Usage .CacheWriteTokens )* m .Cost .CacheWrite ) / 1e6
1300+ sess .Cost += streamCost - prevStreamCost
1301+ prevStreamCost = streamCost
13001302 }
13011303
13021304 sess .InputTokens = response .Usage .InputTokens + response .Usage .CachedInputTokens + response .Usage .CacheWriteTokens
0 commit comments