Skip to content

Commit 1711d6c

Browse files
committed
feat(deepseek): enhance model support with prompt caching and detailed usage metrics
- Add support for prompt caching in DeepSeek models - Update model configurations to reflect caching capabilities - Implement detailed usage metrics tracking cache write and read tokens - Extend test coverage for new usage metrics and model parameters - Update pricing information for DeepSeek models to include cache-related costs
1 parent 7cdecdd commit 1711d6c

File tree

3 files changed

+128
-11
lines changed

3 files changed

+128
-11
lines changed

src/api/providers/__tests__/deepseek.test.ts

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ jest.mock("openai", () => {
2626
prompt_tokens: 10,
2727
completion_tokens: 5,
2828
total_tokens: 15,
29+
prompt_tokens_details: {
30+
cache_miss_tokens: 8,
31+
cached_tokens: 2,
32+
},
2933
},
3034
}
3135
}
@@ -53,6 +57,10 @@ jest.mock("openai", () => {
5357
prompt_tokens: 10,
5458
completion_tokens: 5,
5559
total_tokens: 15,
60+
prompt_tokens_details: {
61+
cache_miss_tokens: 8,
62+
cached_tokens: 2,
63+
},
5664
},
5765
}
5866
},
@@ -149,7 +157,7 @@ describe("DeepSeekHandler", () => {
149157
expect(model.info.maxTokens).toBe(8192)
150158
expect(model.info.contextWindow).toBe(64_000)
151159
expect(model.info.supportsImages).toBe(false)
152-
expect(model.info.supportsPromptCache).toBe(false)
160+
expect(model.info.supportsPromptCache).toBe(true) // Should be true now
153161
})
154162

155163
it("should return provided model ID with default model info if model does not exist", () => {
@@ -160,7 +168,12 @@ describe("DeepSeekHandler", () => {
160168
const model = handlerWithInvalidModel.getModel()
161169
expect(model.id).toBe("invalid-model") // Returns provided ID
162170
expect(model.info).toBeDefined()
163-
expect(model.info).toBe(handler.getModel().info) // But uses default model info
171+
// Should not be the same object reference anymore due to the spread and override
172+
expect(model.info).not.toBe(handler.getModel().info)
173+
// But should have the same base properties
174+
expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow)
175+
// And should have supportsPromptCache set to true
176+
expect(model.info.supportsPromptCache).toBe(true)
164177
})
165178

166179
it("should return default model if no model ID is provided", () => {
@@ -171,6 +184,13 @@ describe("DeepSeekHandler", () => {
171184
const model = handlerWithoutModel.getModel()
172185
expect(model.id).toBe(deepSeekDefaultModelId)
173186
expect(model.info).toBeDefined()
187+
expect(model.info.supportsPromptCache).toBe(true)
188+
})
189+
190+
it("should include model parameters from getModelParams", () => {
191+
const model = handler.getModel()
192+
expect(model).toHaveProperty("temperature")
193+
expect(model).toHaveProperty("maxTokens")
174194
})
175195
})
176196

@@ -213,5 +233,74 @@ describe("DeepSeekHandler", () => {
213233
expect(usageChunks[0].inputTokens).toBe(10)
214234
expect(usageChunks[0].outputTokens).toBe(5)
215235
})
236+
237+
it("should include cache metrics in usage information", async () => {
238+
const stream = handler.createMessage(systemPrompt, messages)
239+
const chunks: any[] = []
240+
for await (const chunk of stream) {
241+
chunks.push(chunk)
242+
}
243+
244+
const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
245+
expect(usageChunks.length).toBeGreaterThan(0)
246+
expect(usageChunks[0].cacheWriteTokens).toBe(8)
247+
expect(usageChunks[0].cacheReadTokens).toBe(2)
248+
})
249+
})
250+
251+
describe("processUsageMetrics", () => {
252+
it("should correctly process usage metrics including cache information", () => {
253+
// We need to access the protected method, so we'll create a test subclass
254+
class TestDeepSeekHandler extends DeepSeekHandler {
255+
public testProcessUsageMetrics(usage: any) {
256+
return this.processUsageMetrics(usage)
257+
}
258+
}
259+
260+
const testHandler = new TestDeepSeekHandler(mockOptions)
261+
262+
const usage = {
263+
prompt_tokens: 100,
264+
completion_tokens: 50,
265+
total_tokens: 150,
266+
prompt_tokens_details: {
267+
cache_miss_tokens: 80,
268+
cached_tokens: 20,
269+
},
270+
}
271+
272+
const result = testHandler.testProcessUsageMetrics(usage)
273+
274+
expect(result.type).toBe("usage")
275+
expect(result.inputTokens).toBe(100)
276+
expect(result.outputTokens).toBe(50)
277+
expect(result.cacheWriteTokens).toBe(80)
278+
expect(result.cacheReadTokens).toBe(20)
279+
})
280+
281+
it("should handle missing cache metrics gracefully", () => {
282+
class TestDeepSeekHandler extends DeepSeekHandler {
283+
public testProcessUsageMetrics(usage: any) {
284+
return this.processUsageMetrics(usage)
285+
}
286+
}
287+
288+
const testHandler = new TestDeepSeekHandler(mockOptions)
289+
290+
const usage = {
291+
prompt_tokens: 100,
292+
completion_tokens: 50,
293+
total_tokens: 150,
294+
// No prompt_tokens_details
295+
}
296+
297+
const result = testHandler.testProcessUsageMetrics(usage)
298+
299+
expect(result.type).toBe("usage")
300+
expect(result.inputTokens).toBe(100)
301+
expect(result.outputTokens).toBe(50)
302+
expect(result.cacheWriteTokens).toBeUndefined()
303+
expect(result.cacheReadTokens).toBeUndefined()
304+
})
216305
})
217306
})

src/api/providers/deepseek.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
2-
import { ModelInfo } from "../../shared/api"
3-
import { deepSeekModels, deepSeekDefaultModelId } from "../../shared/api"
2+
import { deepSeekModels, deepSeekDefaultModelId, ModelInfo } from "../../shared/api"
3+
import { ApiStreamUsageChunk } from "../transform/stream" // Import for type
4+
import { getModelParams } from "../index"
5+
import OpenAI from "openai"
46

57
export class DeepSeekHandler extends OpenAiHandler {
68
constructor(options: OpenAiHandlerOptions) {
@@ -16,9 +18,30 @@ export class DeepSeekHandler extends OpenAiHandler {
1618

1719
override getModel(): { id: string; info: ModelInfo } {
1820
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
21+
const originalInfo =
22+
deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
23+
24+
// Apply defaults with proper typing
25+
const info: ModelInfo = {
26+
...originalInfo,
27+
supportsPromptCache: true, // DeepSeek *does* support prompt caching
28+
}
29+
1930
return {
2031
id: modelId,
21-
info: deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId],
32+
info,
33+
...getModelParams({ options: this.options, model: info }),
34+
}
35+
}
36+
37+
// Override to handle DeepSeek's usage metrics, including caching.
38+
protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
39+
return {
40+
type: "usage",
41+
inputTokens: usage?.prompt_tokens || 0,
42+
outputTokens: usage?.completion_tokens || 0,
43+
cacheWriteTokens: usage?.prompt_tokens_details?.cache_miss_tokens,
44+
cacheReadTokens: usage?.prompt_tokens_details?.cached_tokens,
2245
}
2346
}
2447
}

src/shared/api.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -814,19 +814,24 @@ export const deepSeekModels = {
814814
maxTokens: 8192,
815815
contextWindow: 64_000,
816816
supportsImages: false,
817-
supportsPromptCache: false,
818-
inputPrice: 0.014, // $0.014 per million tokens
819-
outputPrice: 0.28, // $0.28 per million tokens
817+
supportsPromptCache: true,
818+
inputPrice: 0.27, // $0.27 per million tokens (cache miss)
819+
outputPrice: 1.1, // $1.10 per million tokens
820+
cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss)
821+
cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit)
820822
description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
821823
},
822824
"deepseek-reasoner": {
823825
maxTokens: 8192,
824826
contextWindow: 64_000,
825827
supportsImages: false,
826-
supportsPromptCache: false,
827-
inputPrice: 0.55, // $0.55 per million tokens
828+
supportsPromptCache: true,
829+
inputPrice: 0.55, // $0.55 per million tokens (cache miss)
828830
outputPrice: 2.19, // $2.19 per million tokens
829-
description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`,
831+
cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss)
832+
cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit)
833+
thinking: true, // Supports Chain of Thought with 32K tokens
834+
description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`,
830835
},
831836
} as const satisfies Record<string, ModelInfo>
832837

0 commit comments

Comments
 (0)