Skip to content

Commit d9e0310

Browse files
authored
Merge pull request #1427 from olweraltuve/fix-deepseek-cache-official-api
Fix deepseek cache official api
2 parents b11230c + 21101e2 commit d9e0310

File tree

3 files changed

+119
-11
lines changed

3 files changed

+119
-11
lines changed

src/api/providers/__tests__/deepseek.test.ts

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ jest.mock("openai", () => {
2626
prompt_tokens: 10,
2727
completion_tokens: 5,
2828
total_tokens: 15,
29+
prompt_tokens_details: {
30+
cache_miss_tokens: 8,
31+
cached_tokens: 2,
32+
},
2933
},
3034
}
3135
}
@@ -53,6 +57,10 @@ jest.mock("openai", () => {
5357
prompt_tokens: 10,
5458
completion_tokens: 5,
5559
total_tokens: 15,
60+
prompt_tokens_details: {
61+
cache_miss_tokens: 8,
62+
cached_tokens: 2,
63+
},
5664
},
5765
}
5866
},
@@ -149,7 +157,7 @@ describe("DeepSeekHandler", () => {
149157
expect(model.info.maxTokens).toBe(8192)
150158
expect(model.info.contextWindow).toBe(64_000)
151159
expect(model.info.supportsImages).toBe(false)
152-
expect(model.info.supportsPromptCache).toBe(false)
160+
expect(model.info.supportsPromptCache).toBe(true) // Should be true now
153161
})
154162

155163
it("should return provided model ID with default model info if model does not exist", () => {
@@ -160,7 +168,12 @@ describe("DeepSeekHandler", () => {
160168
const model = handlerWithInvalidModel.getModel()
161169
expect(model.id).toBe("invalid-model") // Returns provided ID
162170
expect(model.info).toBeDefined()
163-
expect(model.info).toBe(handler.getModel().info) // But uses default model info
171+
// With the current implementation, it's the same object reference when using default model info
172+
expect(model.info).toBe(handler.getModel().info)
173+
// Should have the same base properties
174+
expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow)
175+
// And should have supportsPromptCache set to true
176+
expect(model.info.supportsPromptCache).toBe(true)
164177
})
165178

166179
it("should return default model if no model ID is provided", () => {
@@ -171,6 +184,13 @@ describe("DeepSeekHandler", () => {
171184
const model = handlerWithoutModel.getModel()
172185
expect(model.id).toBe(deepSeekDefaultModelId)
173186
expect(model.info).toBeDefined()
187+
expect(model.info.supportsPromptCache).toBe(true)
188+
})
189+
190+
it("should include model parameters from getModelParams", () => {
191+
const model = handler.getModel()
192+
expect(model).toHaveProperty("temperature")
193+
expect(model).toHaveProperty("maxTokens")
174194
})
175195
})
176196

@@ -213,5 +233,74 @@ describe("DeepSeekHandler", () => {
213233
expect(usageChunks[0].inputTokens).toBe(10)
214234
expect(usageChunks[0].outputTokens).toBe(5)
215235
})
236+
237+
it("should include cache metrics in usage information", async () => {
238+
const stream = handler.createMessage(systemPrompt, messages)
239+
const chunks: any[] = []
240+
for await (const chunk of stream) {
241+
chunks.push(chunk)
242+
}
243+
244+
const usageChunks = chunks.filter((chunk) => chunk.type === "usage")
245+
expect(usageChunks.length).toBeGreaterThan(0)
246+
expect(usageChunks[0].cacheWriteTokens).toBe(8)
247+
expect(usageChunks[0].cacheReadTokens).toBe(2)
248+
})
249+
})
250+
251+
describe("processUsageMetrics", () => {
252+
it("should correctly process usage metrics including cache information", () => {
253+
// We need to access the protected method, so we'll create a test subclass
254+
class TestDeepSeekHandler extends DeepSeekHandler {
255+
public testProcessUsageMetrics(usage: any) {
256+
return this.processUsageMetrics(usage)
257+
}
258+
}
259+
260+
const testHandler = new TestDeepSeekHandler(mockOptions)
261+
262+
const usage = {
263+
prompt_tokens: 100,
264+
completion_tokens: 50,
265+
total_tokens: 150,
266+
prompt_tokens_details: {
267+
cache_miss_tokens: 80,
268+
cached_tokens: 20,
269+
},
270+
}
271+
272+
const result = testHandler.testProcessUsageMetrics(usage)
273+
274+
expect(result.type).toBe("usage")
275+
expect(result.inputTokens).toBe(100)
276+
expect(result.outputTokens).toBe(50)
277+
expect(result.cacheWriteTokens).toBe(80)
278+
expect(result.cacheReadTokens).toBe(20)
279+
})
280+
281+
it("should handle missing cache metrics gracefully", () => {
282+
class TestDeepSeekHandler extends DeepSeekHandler {
283+
public testProcessUsageMetrics(usage: any) {
284+
return this.processUsageMetrics(usage)
285+
}
286+
}
287+
288+
const testHandler = new TestDeepSeekHandler(mockOptions)
289+
290+
const usage = {
291+
prompt_tokens: 100,
292+
completion_tokens: 50,
293+
total_tokens: 150,
294+
// No prompt_tokens_details
295+
}
296+
297+
const result = testHandler.testProcessUsageMetrics(usage)
298+
299+
expect(result.type).toBe("usage")
300+
expect(result.inputTokens).toBe(100)
301+
expect(result.outputTokens).toBe(50)
302+
expect(result.cacheWriteTokens).toBeUndefined()
303+
expect(result.cacheReadTokens).toBeUndefined()
304+
})
216305
})
217306
})

src/api/providers/deepseek.ts

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
2-
import { ModelInfo } from "../../shared/api"
3-
import { deepSeekModels, deepSeekDefaultModelId } from "../../shared/api"
2+
import { deepSeekModels, deepSeekDefaultModelId, ModelInfo } from "../../shared/api"
3+
import { ApiStreamUsageChunk } from "../transform/stream" // Import for type
4+
import { getModelParams } from "../index"
45

56
export class DeepSeekHandler extends OpenAiHandler {
67
constructor(options: OpenAiHandlerOptions) {
@@ -16,9 +17,23 @@ export class DeepSeekHandler extends OpenAiHandler {
1617

1718
override getModel(): { id: string; info: ModelInfo } {
1819
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
20+
const info = deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
21+
1922
return {
2023
id: modelId,
21-
info: deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId],
24+
info,
25+
...getModelParams({ options: this.options, model: info }),
26+
}
27+
}
28+
29+
// Override to handle DeepSeek's usage metrics, including caching.
30+
protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
31+
return {
32+
type: "usage",
33+
inputTokens: usage?.prompt_tokens || 0,
34+
outputTokens: usage?.completion_tokens || 0,
35+
cacheWriteTokens: usage?.prompt_tokens_details?.cache_miss_tokens,
36+
cacheReadTokens: usage?.prompt_tokens_details?.cached_tokens,
2237
}
2338
}
2439
}

src/shared/api.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -814,19 +814,23 @@ export const deepSeekModels = {
814814
maxTokens: 8192,
815815
contextWindow: 64_000,
816816
supportsImages: false,
817-
supportsPromptCache: false,
818-
inputPrice: 0.014, // $0.014 per million tokens
819-
outputPrice: 0.28, // $0.28 per million tokens
817+
supportsPromptCache: true,
818+
inputPrice: 0.27, // $0.27 per million tokens (cache miss)
819+
outputPrice: 1.1, // $1.10 per million tokens
820+
cacheWritesPrice: 0.27, // $0.27 per million tokens (cache miss)
821+
cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit).
820822
description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`,
821823
},
822824
"deepseek-reasoner": {
823825
maxTokens: 8192,
824826
contextWindow: 64_000,
825827
supportsImages: false,
826-
supportsPromptCache: false,
827-
inputPrice: 0.55, // $0.55 per million tokens
828+
supportsPromptCache: true,
829+
inputPrice: 0.55, // $0.55 per million tokens (cache miss)
828830
outputPrice: 2.19, // $2.19 per million tokens
829-
description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.`,
831+
cacheWritesPrice: 0.55, // $0.55 per million tokens (cache miss)
832+
cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit)
833+
description: `DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 32K tokens.`,
830834
},
831835
} as const satisfies Record<string, ModelInfo>
832836

0 commit comments

Comments
 (0)