diff --git a/packages/__tests__/llm-mapper/anthropic-stream-cache-tokens.test.ts b/packages/__tests__/llm-mapper/anthropic-stream-cache-tokens.test.ts index 6b4071f9be..5a0f926005 100644 --- a/packages/__tests__/llm-mapper/anthropic-stream-cache-tokens.test.ts +++ b/packages/__tests__/llm-mapper/anthropic-stream-cache-tokens.test.ts @@ -114,6 +114,93 @@ data: {"type":"message_stop"}`; expect(usageChunk.usage.prompt_tokens).toBe(1000); expect(usageChunk.usage.total_tokens).toBe(1000 + 50); }); + + it("should handle tool call streaming where message_delta only has output_tokens", () => { + const converter = new AnthropicToOpenAIStreamConverter(); + + // From Anthropic docs: tool call streaming - message_delta only has output_tokens + const nativeResponse = `event: message_start +data: {"type":"message_start","message":{"id":"msg_014p7gG3wDgGV9EUtLvnow3U","type":"message","role":"assistant","model":"claude-opus-4-6","stop_sequence":null,"usage":{"input_tokens":472,"output_tokens":2},"content":[],"stop_reason":null}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\\"location\\": \\"San Francisco, CA\\"}"}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":89}} + +event: message_stop +data: {"type":"message_stop"}`; + + const chunks: any[] = []; + converter.processLines(nativeResponse, (chunk) => { + chunks.push(chunk); + }); + + const usageChunk = chunks.find((c) => c.usage); + expect(usageChunk).toBeDefined(); + + // prompt_tokens should come from message_start input_tokens (472), NOT be 0 or 1 + expect(usageChunk.usage.prompt_tokens).toBe(472); + expect(usageChunk.usage.completion_tokens).toBe(89); + }); + + it("should handle extended thinking streaming where message_start has no usage", () => { + const converter = new AnthropicToOpenAIStreamConverter(); + + // From Anthropic docs: extended thinking - message_start has no usage field + const nativeResponse = `event: message_start +data: {"type":"message_start","message":{"id":"msg_01","type":"message","role":"assistant","content":[],"model":"claude-opus-4-6","stop_reason":null,"stop_sequence":null}} + +event: content_block_start +data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"Let me think..."}} + +event: content_block_delta +data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"EqQBCg..."}} + +event: content_block_stop +data: {"type":"content_block_stop","index":0} + +event: content_block_start +data: {"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}} + +event: content_block_delta +data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"The answer is 21."}} + +event: content_block_stop +data: {"type":"content_block_stop","index":1} + +event: message_delta +data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null}} + +event: message_stop +data: {"type":"message_stop"}`; + + const chunks: any[] = []; + converter.processLines(nativeResponse, (chunk) => { + chunks.push(chunk); + }); + + // Should not crash and should produce chunks + expect(chunks.length).toBeGreaterThan(0); + + // Usage chunk should have 0 tokens (no usage data available) + const usageChunk = chunks.find((c) => c.usage); + expect(usageChunk).toBeDefined(); + expect(usageChunk.usage.prompt_tokens).toBe(0); + expect(usageChunk.usage.completion_tokens).toBe(0); + }); }); describe("toOpenAI (non-streaming) - Cache Token Handling", () => { diff --git a/packages/llm-mapper/transform/providers/anthropic/streamedResponse/toOpenai.ts b/packages/llm-mapper/transform/providers/anthropic/streamedResponse/toOpenai.ts index 0e90adfb3b..04657392b2 100644 --- a/packages/llm-mapper/transform/providers/anthropic/streamedResponse/toOpenai.ts +++ b/packages/llm-mapper/transform/providers/anthropic/streamedResponse/toOpenai.ts @@ -77,12 +77,13 @@ export class AnthropicToOpenAIStreamConverter { case "message_start": this.messageId = event.message.id; this.model = event.message.model; - this.inputTokens = event.message.usage.input_tokens ?? 0; + // usage may be missing (e.g. extended thinking streams on Claude 4.6) + this.inputTokens = event.message.usage?.input_tokens ?? 0; this.cacheReadInputTokens = - event.message.usage.cache_read_input_tokens ?? 0; + event.message.usage?.cache_read_input_tokens ?? 0; this.cacheCreationInputTokens = - event.message.usage.cache_creation_input_tokens ?? 0; - this.cacheCreationDetails = event.message.usage.cache_creation + event.message.usage?.cache_creation_input_tokens ?? 0; + this.cacheCreationDetails = event.message.usage?.cache_creation ? { ephemeral_5m_input_tokens: event.message.usage.cache_creation.ephemeral_5m_input_tokens ?? @@ -303,20 +304,26 @@ export class AnthropicToOpenAIStreamConverter { this.finalizePendingToolCalls(chunks); // Cache tokens may come from message_start (stored in instance vars) or message_delta + // usage may be missing (e.g. extended thinking streams on Claude 4.6) const cachedTokens = - event.usage.cache_read_input_tokens ?? this.cacheReadInputTokens; + event.usage?.cache_read_input_tokens ?? this.cacheReadInputTokens; const cacheWriteTokens = - event.usage.cache_creation_input_tokens ?? + event.usage?.cache_creation_input_tokens ?? this.cacheCreationInputTokens; const webSearchRequests = - event.usage.server_tool_use?.web_search_requests ?? 0; + event.usage?.server_tool_use?.web_search_requests ?? 0; + + // Prefer message_delta input_tokens (cumulative/final) over message_start value + const finalInputTokens = + event.usage?.input_tokens ?? this.inputTokens; + const finalOutputTokens = event.usage?.output_tokens ?? 0; this.finalUsage = { - prompt_tokens: event.usage.input_tokens ?? this.inputTokens, - completion_tokens: event.usage.output_tokens, + prompt_tokens: finalInputTokens, + completion_tokens: finalOutputTokens, total_tokens: - (event.usage.input_tokens ?? this.inputTokens) + - event.usage.output_tokens + + finalInputTokens + + finalOutputTokens + (cachedTokens ?? 0) + (cacheWriteTokens ?? 0), ...((cachedTokens > 0 || cacheWriteTokens > 0) && {