From 1711d6c04d2c91c47f98a6a0f00c3132cbf5c9c9 Mon Sep 17 00:00:00 2001 From: Olwer Altuve Date: Thu, 6 Mar 2025 08:26:03 -0400 Subject: [PATCH 1/4] feat(deepseek): enhance model support with prompt caching and detailed usage metrics - Add support for prompt caching in DeepSeek models - Update model configurations to reflect caching capabilities - Implement detailed usage metrics tracking cache write and read tokens - Extend test coverage for new usage metrics and model parameters - Update pricing information for DeepSeek models to include cache-related costs --- src/api/providers/__tests__/deepseek.test.ts | 93 +++++++++++++++++++- src/api/providers/deepseek.ts | 29 +++++- src/shared/api.ts | 17 ++-- 3 files changed, 128 insertions(+), 11 deletions(-) diff --git a/src/api/providers/__tests__/deepseek.test.ts b/src/api/providers/__tests__/deepseek.test.ts index 483552efe0f..fccad000a74 100644 --- a/src/api/providers/__tests__/deepseek.test.ts +++ b/src/api/providers/__tests__/deepseek.test.ts @@ -26,6 +26,10 @@ jest.mock("openai", () => { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15, + prompt_tokens_details: { + cache_miss_tokens: 8, + cached_tokens: 2, + }, }, } } @@ -53,6 +57,10 @@ jest.mock("openai", () => { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15, + prompt_tokens_details: { + cache_miss_tokens: 8, + cached_tokens: 2, + }, }, } }, @@ -149,7 +157,7 @@ describe("DeepSeekHandler", () => { expect(model.info.maxTokens).toBe(8192) expect(model.info.contextWindow).toBe(64_000) expect(model.info.supportsImages).toBe(false) - expect(model.info.supportsPromptCache).toBe(false) + expect(model.info.supportsPromptCache).toBe(true) // Should be true now }) it("should return provided model ID with default model info if model does not exist", () => { @@ -160,7 +168,12 @@ describe("DeepSeekHandler", () => { const model = handlerWithInvalidModel.getModel() expect(model.id).toBe("invalid-model") // Returns provided ID expect(model.info).toBeDefined() - expect(model.info).toBe(handler.getModel().info) // But uses default model info + // Should not be the same object reference anymore due to the spread and override + expect(model.info).not.toBe(handler.getModel().info) + // But should have the same base properties + expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow) + // And should have supportsPromptCache set to true + expect(model.info.supportsPromptCache).toBe(true) }) it("should return default model if no model ID is provided", () => { @@ -171,6 +184,13 @@ describe("DeepSeekHandler", () => { const model = handlerWithoutModel.getModel() expect(model.id).toBe(deepSeekDefaultModelId) expect(model.info).toBeDefined() + expect(model.info.supportsPromptCache).toBe(true) + }) + + it("should include model parameters from getModelParams", () => { + const model = handler.getModel() + expect(model).toHaveProperty("temperature") + expect(model).toHaveProperty("maxTokens") }) }) @@ -213,5 +233,74 @@ describe("DeepSeekHandler", () => { expect(usageChunks[0].inputTokens).toBe(10) expect(usageChunks[0].outputTokens).toBe(5) }) + + it("should include cache metrics in usage information", async () => { + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunks = chunks.filter((chunk) => chunk.type === "usage") + expect(usageChunks.length).toBeGreaterThan(0) + expect(usageChunks[0].cacheWriteTokens).toBe(8) + expect(usageChunks[0].cacheReadTokens).toBe(2) + }) + }) + + describe("processUsageMetrics", () => { + it("should correctly process usage metrics including cache information", () => { + // We need to access the protected method, so we'll create a test subclass + class TestDeepSeekHandler extends DeepSeekHandler { + public testProcessUsageMetrics(usage: any) { + return this.processUsageMetrics(usage) + } + } + + const testHandler = new TestDeepSeekHandler(mockOptions) + + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + prompt_tokens_details: { + cache_miss_tokens: 80, + cached_tokens: 20, + }, + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBe(80) + expect(result.cacheReadTokens).toBe(20) + }) + + it("should handle missing cache metrics gracefully", () => { + class TestDeepSeekHandler extends DeepSeekHandler { + public testProcessUsageMetrics(usage: any) { + return this.processUsageMetrics(usage) + } + } + + const testHandler = new TestDeepSeekHandler(mockOptions) + + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + // No prompt_tokens_details + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBeUndefined() + expect(result.cacheReadTokens).toBeUndefined() + }) }) }) diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 9ba19aa5db8..a9d3f236bcf 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -1,6 +1,8 @@ import { OpenAiHandler, OpenAiHandlerOptions } from "./openai" -import { ModelInfo } from "../../shared/api" -import { deepSeekModels, deepSeekDefaultModelId } from "../../shared/api" +import { deepSeekModels, deepSeekDefaultModelId, ModelInfo } from "../../shared/api" +import { ApiStreamUsageChunk } from "../transform/stream" // Import for type +import { getModelParams } from "../index" +import OpenAI from "openai" export class DeepSeekHandler extends OpenAiHandler { constructor(options: OpenAiHandlerOptions) { @@ -16,9 +18,30 @@ export class DeepSeekHandler extends OpenAiHandler { override getModel(): { id: string; info: ModelInfo } { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId + const originalInfo = + deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] + + // Apply defaults with proper typing + const info: ModelInfo = { + ...originalInfo, + supportsPromptCache: true, // DeepSeek *does* support prompt caching + } + return { id: modelId, - info: deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId], + info, + ...getModelParams({ options: this.options, model: info }), + } + } + + // Override to handle DeepSeek's usage metrics, including caching. + protected override processUsageMetrics(usage: any): ApiStreamUsageChunk { + return { + type: "usage", + inputTokens: usage?.prompt_tokens || 0, + outputTokens: usage?.completion_tokens || 0, + cacheWriteTokens: usage?.prompt_tokens_details?.cache_miss_tokens, + cacheReadTokens: usage?.prompt_tokens_details?.cached_tokens, } } } diff --git a/src/shared/api.ts b/src/shared/api.ts index d066039850b..48eb105b13c 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -814,19 +814,24 @@ export const deepSeekModels = { maxTokens: 8192, contextWindow: 64_000, supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.014, // $0.014 per million tokens - outputPrice: 0.28, // $0.28 per million tokens + supportsPromptCache: true, + inputPrice: 0.27, // $0.27 per million tokens (cache miss) + outputPrice: 1.1, // $1.10 per million tokens + cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss) + cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit) description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`, }, "deepseek-reasoner": { maxTokens: 8192, contextWindow: 64_000, supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.55, // $0.55 per million tokens + supportsPromptCache: true, + inputPrice: 0.55, // $0.55 per million tokens (cache miss) outputPrice: 2.19, // $2.19 per million tokens - description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`, + cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss) + cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit) + thinking: true, // Supports Chain of Thought with 32K tokens + description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`, }, } as const satisfies Record From 89cf2c4c58dc973f44ab53c3f2997d4f5a31b421 Mon Sep 17 00:00:00 2001 From: Olwer Altuve Date: Thu, 6 Mar 2025 11:26:15 -0400 Subject: [PATCH 2/4] feat(api): Add DeepSeek provider support for prompt caching and detailed usage metrics - Update DeepSeekHandler to support prompt caching - Add cache token tracking in usage metrics - Update DeepSeek model configurations with cache-related pricing - Enhance test coverage for cache and usage metric handling --- src/api/providers/__tests__/deepseek.test.ts | 93 +++++++++++++++++++- src/api/providers/deepseek.ts | 29 +++++- src/shared/api.ts | 16 ++-- 3 files changed, 127 insertions(+), 11 deletions(-) diff --git a/src/api/providers/__tests__/deepseek.test.ts b/src/api/providers/__tests__/deepseek.test.ts index 483552efe0f..fccad000a74 100644 --- a/src/api/providers/__tests__/deepseek.test.ts +++ b/src/api/providers/__tests__/deepseek.test.ts @@ -26,6 +26,10 @@ jest.mock("openai", () => { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15, + prompt_tokens_details: { + cache_miss_tokens: 8, + cached_tokens: 2, + }, }, } } @@ -53,6 +57,10 @@ jest.mock("openai", () => { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15, + prompt_tokens_details: { + cache_miss_tokens: 8, + cached_tokens: 2, + }, }, } }, @@ -149,7 +157,7 @@ describe("DeepSeekHandler", () => { expect(model.info.maxTokens).toBe(8192) expect(model.info.contextWindow).toBe(64_000) expect(model.info.supportsImages).toBe(false) - expect(model.info.supportsPromptCache).toBe(false) + expect(model.info.supportsPromptCache).toBe(true) // Should be true now }) it("should return provided model ID with default model info if model does not exist", () => { @@ -160,7 +168,12 @@ describe("DeepSeekHandler", () => { const model = handlerWithInvalidModel.getModel() expect(model.id).toBe("invalid-model") // Returns provided ID expect(model.info).toBeDefined() - expect(model.info).toBe(handler.getModel().info) // But uses default model info + // Should not be the same object reference anymore due to the spread and override + expect(model.info).not.toBe(handler.getModel().info) + // But should have the same base properties + expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow) + // And should have supportsPromptCache set to true + expect(model.info.supportsPromptCache).toBe(true) }) it("should return default model if no model ID is provided", () => { @@ -171,6 +184,13 @@ describe("DeepSeekHandler", () => { const model = handlerWithoutModel.getModel() expect(model.id).toBe(deepSeekDefaultModelId) expect(model.info).toBeDefined() + expect(model.info.supportsPromptCache).toBe(true) + }) + + it("should include model parameters from getModelParams", () => { + const model = handler.getModel() + expect(model).toHaveProperty("temperature") + expect(model).toHaveProperty("maxTokens") }) }) @@ -213,5 +233,74 @@ describe("DeepSeekHandler", () => { expect(usageChunks[0].inputTokens).toBe(10) expect(usageChunks[0].outputTokens).toBe(5) }) + + it("should include cache metrics in usage information", async () => { + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const usageChunks = chunks.filter((chunk) => chunk.type === "usage") + expect(usageChunks.length).toBeGreaterThan(0) + expect(usageChunks[0].cacheWriteTokens).toBe(8) + expect(usageChunks[0].cacheReadTokens).toBe(2) + }) + }) + + describe("processUsageMetrics", () => { + it("should correctly process usage metrics including cache information", () => { + // We need to access the protected method, so we'll create a test subclass + class TestDeepSeekHandler extends DeepSeekHandler { + public testProcessUsageMetrics(usage: any) { + return this.processUsageMetrics(usage) + } + } + + const testHandler = new TestDeepSeekHandler(mockOptions) + + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + prompt_tokens_details: { + cache_miss_tokens: 80, + cached_tokens: 20, + }, + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBe(80) + expect(result.cacheReadTokens).toBe(20) + }) + + it("should handle missing cache metrics gracefully", () => { + class TestDeepSeekHandler extends DeepSeekHandler { + public testProcessUsageMetrics(usage: any) { + return this.processUsageMetrics(usage) + } + } + + const testHandler = new TestDeepSeekHandler(mockOptions) + + const usage = { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + // No prompt_tokens_details + } + + const result = testHandler.testProcessUsageMetrics(usage) + + expect(result.type).toBe("usage") + expect(result.inputTokens).toBe(100) + expect(result.outputTokens).toBe(50) + expect(result.cacheWriteTokens).toBeUndefined() + expect(result.cacheReadTokens).toBeUndefined() + }) }) }) diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 9ba19aa5db8..a9d3f236bcf 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -1,6 +1,8 @@ import { OpenAiHandler, OpenAiHandlerOptions } from "./openai" -import { ModelInfo } from "../../shared/api" -import { deepSeekModels, deepSeekDefaultModelId } from "../../shared/api" +import { deepSeekModels, deepSeekDefaultModelId, ModelInfo } from "../../shared/api" +import { ApiStreamUsageChunk } from "../transform/stream" // Import for type +import { getModelParams } from "../index" +import OpenAI from "openai" export class DeepSeekHandler extends OpenAiHandler { constructor(options: OpenAiHandlerOptions) { @@ -16,9 +18,30 @@ export class DeepSeekHandler extends OpenAiHandler { override getModel(): { id: string; info: ModelInfo } { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId + const originalInfo = + deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] + + // Apply defaults with proper typing + const info: ModelInfo = { + ...originalInfo, + supportsPromptCache: true, // DeepSeek *does* support prompt caching + } + return { id: modelId, - info: deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId], + info, + ...getModelParams({ options: this.options, model: info }), + } + } + + // Override to handle DeepSeek's usage metrics, including caching. + protected override processUsageMetrics(usage: any): ApiStreamUsageChunk { + return { + type: "usage", + inputTokens: usage?.prompt_tokens || 0, + outputTokens: usage?.completion_tokens || 0, + cacheWriteTokens: usage?.prompt_tokens_details?.cache_miss_tokens, + cacheReadTokens: usage?.prompt_tokens_details?.cached_tokens, } } } diff --git a/src/shared/api.ts b/src/shared/api.ts index d066039850b..b28b139c690 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -814,19 +814,23 @@ export const deepSeekModels = { maxTokens: 8192, contextWindow: 64_000, supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.014, // $0.014 per million tokens - outputPrice: 0.28, // $0.28 per million tokens + supportsPromptCache: true, + inputPrice: 0.27, // $0.27 per million tokens (cache miss) + outputPrice: 1.1, // $1.10 per million tokens + cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss) + cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit) description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`, }, "deepseek-reasoner": { maxTokens: 8192, contextWindow: 64_000, supportsImages: false, - supportsPromptCache: false, - inputPrice: 0.55, // $0.55 per million tokens + supportsPromptCache: true, + inputPrice: 0.55, // $0.55 per million tokens (cache miss) outputPrice: 2.19, // $2.19 per million tokens - description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`, + cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss) + cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit) + description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`, }, } as const satisfies Record From e75fdc1ef6e47e03ebd12baad80363aa82173e63 Mon Sep 17 00:00:00 2001 From: Olwer Altuve Date: Thu, 6 Mar 2025 11:51:11 -0400 Subject: [PATCH 3/4] only info --- src/api/providers/deepseek.ts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index a9d3f236bcf..8f1f517a14b 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -18,14 +18,7 @@ export class DeepSeekHandler extends OpenAiHandler { override getModel(): { id: string; info: ModelInfo } { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId - const originalInfo = - deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] - - // Apply defaults with proper typing - const info: ModelInfo = { - ...originalInfo, - supportsPromptCache: true, // DeepSeek *does* support prompt caching - } + const info = deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] return { id: modelId, From 21101e25bfee2256b06d049812fde6c89a4fd875 Mon Sep 17 00:00:00 2001 From: Olwer Altuve Date: Thu, 6 Mar 2025 12:29:13 -0400 Subject: [PATCH 4/4] refactor(deepseek): update test expectations - Update test case to reflect current object reference behavior for model info --- src/api/providers/__tests__/deepseek.test.ts | 6 +++--- src/api/providers/deepseek.ts | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/api/providers/__tests__/deepseek.test.ts b/src/api/providers/__tests__/deepseek.test.ts index fccad000a74..eb00bf6d65d 100644 --- a/src/api/providers/__tests__/deepseek.test.ts +++ b/src/api/providers/__tests__/deepseek.test.ts @@ -168,9 +168,9 @@ describe("DeepSeekHandler", () => { const model = handlerWithInvalidModel.getModel() expect(model.id).toBe("invalid-model") // Returns provided ID expect(model.info).toBeDefined() - // Should not be the same object reference anymore due to the spread and override - expect(model.info).not.toBe(handler.getModel().info) - // But should have the same base properties + // With the current implementation, it's the same object reference when using default model info + expect(model.info).toBe(handler.getModel().info) + // Should have the same base properties expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow) // And should have supportsPromptCache set to true expect(model.info.supportsPromptCache).toBe(true) diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 8f1f517a14b..2c12637d947 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -2,7 +2,6 @@ import { OpenAiHandler, OpenAiHandlerOptions } from "./openai" import { deepSeekModels, deepSeekDefaultModelId, ModelInfo } from "../../shared/api" import { ApiStreamUsageChunk } from "../transform/stream" // Import for type import { getModelParams } from "../index" -import OpenAI from "openai" export class DeepSeekHandler extends OpenAiHandler { constructor(options: OpenAiHandlerOptions) {