Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,93 @@ data: {"type":"message_stop"}`;
expect(usageChunk.usage.prompt_tokens).toBe(1000);
expect(usageChunk.usage.total_tokens).toBe(1000 + 50);
});

it("should handle tool call streaming where message_delta only has output_tokens", () => {
const converter = new AnthropicToOpenAIStreamConverter();

// From Anthropic docs: tool call streaming - message_delta only has output_tokens
const nativeResponse = `event: message_start
data: {"type":"message_start","message":{"id":"msg_014p7gG3wDgGV9EUtLvnow3U","type":"message","role":"assistant","model":"claude-opus-4-6","stop_sequence":null,"usage":{"input_tokens":472,"output_tokens":2},"content":[],"stop_reason":null}}

event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}

event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}}

event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\\"location\\": \\"San Francisco, CA\\"}"}}

event: content_block_stop
data: {"type":"content_block_stop","index":0}

event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":89}}

event: message_stop
data: {"type":"message_stop"}`;

const chunks: any[] = [];
converter.processLines(nativeResponse, (chunk) => {
chunks.push(chunk);
});

const usageChunk = chunks.find((c) => c.usage);
expect(usageChunk).toBeDefined();

// prompt_tokens should come from message_start input_tokens (472), NOT be 0 or 1
expect(usageChunk.usage.prompt_tokens).toBe(472);
expect(usageChunk.usage.completion_tokens).toBe(89);
});

it("should handle extended thinking streaming where message_start has no usage", () => {
const converter = new AnthropicToOpenAIStreamConverter();

// From Anthropic docs: extended thinking - message_start has no usage field
const nativeResponse = `event: message_start
data: {"type":"message_start","message":{"id":"msg_01","type":"message","role":"assistant","content":[],"model":"claude-opus-4-6","stop_reason":null,"stop_sequence":null}}

event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}

event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"Let me think..."}}

event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"EqQBCg..."}}

event: content_block_stop
data: {"type":"content_block_stop","index":0}

event: content_block_start
data: {"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}}

event: content_block_delta
data: {"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"The answer is 21."}}

event: content_block_stop
data: {"type":"content_block_stop","index":1}

event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null}}

event: message_stop
data: {"type":"message_stop"}`;

const chunks: any[] = [];
converter.processLines(nativeResponse, (chunk) => {
chunks.push(chunk);
});

// Should not crash and should produce chunks
expect(chunks.length).toBeGreaterThan(0);

// Usage chunk should have 0 tokens (no usage data available)
const usageChunk = chunks.find((c) => c.usage);
expect(usageChunk).toBeDefined();
expect(usageChunk.usage.prompt_tokens).toBe(0);
expect(usageChunk.usage.completion_tokens).toBe(0);
});
});

describe("toOpenAI (non-streaming) - Cache Token Handling", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,13 @@ export class AnthropicToOpenAIStreamConverter {
case "message_start":
this.messageId = event.message.id;
this.model = event.message.model;
this.inputTokens = event.message.usage.input_tokens ?? 0;
// usage may be missing (e.g. extended thinking streams on Claude 4.6)
this.inputTokens = event.message.usage?.input_tokens ?? 0;
this.cacheReadInputTokens =
event.message.usage.cache_read_input_tokens ?? 0;
event.message.usage?.cache_read_input_tokens ?? 0;
this.cacheCreationInputTokens =
event.message.usage.cache_creation_input_tokens ?? 0;
this.cacheCreationDetails = event.message.usage.cache_creation
event.message.usage?.cache_creation_input_tokens ?? 0;
this.cacheCreationDetails = event.message.usage?.cache_creation
? {
ephemeral_5m_input_tokens:
event.message.usage.cache_creation.ephemeral_5m_input_tokens ??
Expand Down Expand Up @@ -303,20 +304,26 @@ export class AnthropicToOpenAIStreamConverter {
this.finalizePendingToolCalls(chunks);

// Cache tokens may come from message_start (stored in instance vars) or message_delta
// usage may be missing (e.g. extended thinking streams on Claude 4.6)
const cachedTokens =
event.usage.cache_read_input_tokens ?? this.cacheReadInputTokens;
event.usage?.cache_read_input_tokens ?? this.cacheReadInputTokens;
const cacheWriteTokens =
event.usage.cache_creation_input_tokens ??
event.usage?.cache_creation_input_tokens ??
this.cacheCreationInputTokens;
const webSearchRequests =
event.usage.server_tool_use?.web_search_requests ?? 0;
event.usage?.server_tool_use?.web_search_requests ?? 0;

// Prefer message_delta input_tokens (cumulative/final) over message_start value
const finalInputTokens =
event.usage?.input_tokens ?? this.inputTokens;
const finalOutputTokens = event.usage?.output_tokens ?? 0;

this.finalUsage = {
prompt_tokens: event.usage.input_tokens ?? this.inputTokens,
completion_tokens: event.usage.output_tokens,
prompt_tokens: finalInputTokens,
completion_tokens: finalOutputTokens,
total_tokens:
(event.usage.input_tokens ?? this.inputTokens) +
event.usage.output_tokens +
finalInputTokens +
finalOutputTokens +
(cachedTokens ?? 0) +
(cacheWriteTokens ?? 0),
...((cachedTokens > 0 || cacheWriteTokens > 0) && {
Expand Down
Loading