diff --git a/src/api/providers/__tests__/deepseek.test.ts b/src/api/providers/__tests__/deepseek.test.ts index 483552efe0f..eb00bf6d65d 100644 --- a/src/api/providers/__tests__/deepseek.test.ts +++ b/src/api/providers/__tests__/deepseek.test.ts @@ -26,6 +26,10 @@ jest.mock("openai", () => { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15, + prompt_tokens_details: { + cache_miss_tokens: 8, + cached_tokens: 2, + }, }, } } @@ -53,6 +57,10 @@ jest.mock("openai", () => { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15, + prompt_tokens_details: { + cache_miss_tokens: 8, + cached_tokens: 2, + }, }, } }, @@ -149,7 +157,7 @@ describe("DeepSeekHandler", () => { expect(model.info.maxTokens).toBe(8192) expect(model.info.contextWindow).toBe(64_000) expect(model.info.supportsImages).toBe(false) - expect(model.info.supportsPromptCache).toBe(false) + expect(model.info.supportsPromptCache).toBe(true) // Should be true now }) it("should return provided model ID with default model info if model does not exist", () => { @@ -160,7 +168,12 @@ describe("DeepSeekHandler", () => { const model = handlerWithInvalidModel.getModel() expect(model.id).toBe("invalid-model") // Returns provided ID expect(model.info).toBeDefined() - expect(model.info).toBe(handler.getModel().info) // But uses default model info + // With the current implementation, it's the same object reference when using default model info + expect(model.info).toBe(handler.getModel().info) + // Should have the same base properties + expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow) + // And should have supportsPromptCache set to true + expect(model.info.supportsPromptCache).toBe(true) }) it("should return default model if no model ID is provided", () => { @@ -171,6 +184,13 @@ describe("DeepSeekHandler", () => { const model = handlerWithoutModel.getModel() expect(model.id).toBe(deepSeekDefaultModelId) expect(model.info).toBeDefined() + expect(model.info.supportsPromptCache).toBe(true) + }) + + it("should include model parameters from getModelParams", () => { + const model = handler.getModel() + expect(model).toHaveProperty("temperature") + expect(model).toHaveProperty("maxTokens") }) }) @@ -213,5 +233,74 @@ describe("DeepSeekHandler", () => { expect(usageChunks[0].inputTokens).toBe(10) expect(usageChunks[0].outputTokens).toBe(5) }) + + it("should include cache metrics in usage information", async () => { + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunks = chunks.filter((chunk) => chunk.type === "usage") + expect(usageChunks.length).toBeGreaterThan(0) + expect(usageChunks[0].cacheWriteTokens).toBe(8) + expect(usageChunks[0].cacheReadTokens).toBe(2) + }) + }) + + describe("processUsageMetrics", () => { + it("should correctly process usage metrics including cache information", () => { + // We need to access the protected method, so we'll create a test subclass + class TestDeepSeekHandler extends DeepSeekHandler { + public testProcessUsageMetrics(usage: any) { + return this.processUsageMetrics(usage) + } + } + + const testHandler = new TestDeepSeekHandler(mockOptions) + + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + prompt_tokens_details: { + cache_miss_tokens: 80, + cached_tokens: 20, + }, + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBe(80) + expect(result.cacheReadTokens).toBe(20) + }) + + it("should handle missing cache metrics gracefully", () => { + class TestDeepSeekHandler extends DeepSeekHandler { + public testProcessUsageMetrics(usage: any) { + return this.processUsageMetrics(usage) + } + } + + const testHandler = new TestDeepSeekHandler(mockOptions) + + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + // No prompt_tokens_details + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBeUndefined() + expect(result.cacheReadTokens).toBeUndefined() + }) }) }) diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 9ba19aa5db8..2c12637d947 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -1,6 +1,7 @@ import { OpenAiHandler, OpenAiHandlerOptions } from "./openai" -import { ModelInfo } from "../../shared/api" -import { deepSeekModels, deepSeekDefaultModelId } from "../../shared/api" +import { deepSeekModels, deepSeekDefaultModelId, ModelInfo } from "../../shared/api" +import { ApiStreamUsageChunk } from "../transform/stream" // Import for type +import { getModelParams } from "../index" export class DeepSeekHandler extends OpenAiHandler { constructor(options: OpenAiHandlerOptions) { @@ -16,9 +17,23 @@ export class DeepSeekHandler extends OpenAiHandler { override getModel(): { id: string; info: ModelInfo } { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId + const info = deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] + return { id: modelId, - info: deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId], + info, + ...getModelParams({ options: this.options, model: info }), + } + } + + // Override to handle DeepSeek's usage metrics, including caching. + protected override processUsageMetrics(usage: any): ApiStreamUsageChunk { + return { + type: "usage", + inputTokens: usage?.prompt_tokens || 0, + outputTokens: usage?.completion_tokens || 0, + cacheWriteTokens: usage?.prompt_tokens_details?.cache_miss_tokens, + cacheReadTokens: usage?.prompt_tokens_details?.cached_tokens, } } } diff --git a/src/shared/api.ts b/src/shared/api.ts index d066039850b..41d74427116 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -814,19 +814,23 @@ export const deepSeekModels = { maxTokens: 8192, contextWindow: 64_000, supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.014, // $0.014 per million tokens - outputPrice: 0.28, // $0.28 per million tokens + supportsPromptCache: true, + inputPrice: 0.27, // $0.27 per million tokens (cache miss) + outputPrice: 1.1, // $1.10 per million tokens + cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss) + cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit). description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`, }, "deepseek-reasoner": { maxTokens: 8192, contextWindow: 64_000, supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.55, // $0.55 per million tokens + supportsPromptCache: true, + inputPrice: 0.55, // $0.55 per million tokens (cache miss) outputPrice: 2.19, // $2.19 per million tokens - description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`, + cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss) + cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit) + description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`, }, } as const satisfies Record