Skip to content

Commit 866c5b9

Browse files
Andrei UngureanuAndrei Ungureanu
authored andcommitted
Prevent Codex duplicate completions
1 parent 87b45de commit 866c5b9

File tree

2 files changed

+542
-0
lines changed

2 files changed

+542
-0
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"
2+
import type { Anthropic } from "@anthropic-ai/sdk"
3+
4+
import type { ApiStreamChunk } from "../../transform/stream"
5+
import type { ApiHandlerCreateMessageMetadata } from "../../index"
6+
import type { ApiHandlerOptions } from "../../../shared/api"
7+
import { OpenAiNativeHandler } from "../openai-native"
8+
9+
const createSessionMock = vi.hoisted(() => vi.fn())
10+
11+
vi.mock("openai", () => {
12+
return {
13+
__esModule: true,
14+
default: vi.fn().mockImplementation(() => ({
15+
responses: {
16+
create: vi.fn(),
17+
},
18+
})),
19+
}
20+
})
21+
22+
vi.mock("../../../integrations/codex/run", () => ({
23+
CodexCliSession: {
24+
create: createSessionMock,
25+
},
26+
}))
27+
28+
import { CodexHandler } from "../codex"
29+
30+
const systemPrompt = "You are Codex."
31+
32+
const defaultOptions: ApiHandlerOptions = {
33+
apiModelId: "gpt-5-codex",
34+
openAiNativeApiKey: "test-api-key",
35+
}
36+
37+
const metadata: ApiHandlerCreateMessageMetadata = {
38+
taskId: "task-123",
39+
}
40+
41+
const makeSession = (chunks: ApiStreamChunk[]) => ({
42+
runTurn: vi.fn().mockResolvedValue(asyncGeneratorFromChunks(chunks)),
43+
shutdown: vi.fn().mockResolvedValue(undefined),
44+
})
45+
46+
function asyncGeneratorFromChunks(chunks: ApiStreamChunk[]): AsyncGenerator<ApiStreamChunk> {
47+
return (async function* () {
48+
for (const chunk of chunks) {
49+
yield chunk
50+
}
51+
})()
52+
}
53+
54+
describe("CodexHandler", () => {
55+
let handler: CodexHandler
56+
let fallbackSpy: ReturnType<typeof vi.spyOn>
57+
58+
beforeEach(() => {
59+
createSessionMock.mockReset()
60+
fallbackSpy = vi.spyOn(OpenAiNativeHandler.prototype, "createMessage").mockImplementation(async function* () {
61+
yield { type: "text", text: "[fallback]" }
62+
})
63+
handler = new CodexHandler(defaultOptions)
64+
})
65+
66+
afterEach(() => {
67+
fallbackSpy.mockRestore()
68+
})
69+
70+
it("omits duplicate prefix chunks before streaming new Codex output", async () => {
71+
const messages: Anthropic.Messages.MessageParam[] = [
72+
{ role: "user", content: "Initial request" },
73+
{ role: "assistant", content: "Task completed successfully." },
74+
{ role: "user", content: "Please continue" },
75+
]
76+
77+
createSessionMock.mockResolvedValueOnce(
78+
makeSession([
79+
{ type: "text", text: "Task completed successfully." },
80+
{ type: "text", text: "Here are the next steps." },
81+
{ type: "usage", inputTokens: 10, outputTokens: 5 },
82+
]),
83+
)
84+
85+
const stream = handler.createMessage(systemPrompt, messages, metadata)
86+
const chunks: ApiStreamChunk[] = []
87+
for await (const chunk of stream) {
88+
chunks.push(chunk)
89+
}
90+
91+
const textChunks = chunks.filter((chunk) => chunk.type === "text")
92+
expect(textChunks).toHaveLength(1)
93+
expect(textChunks[0].text).toBe("Here are the next steps.")
94+
const usageChunk = chunks.find((chunk) => chunk.type === "usage")
95+
expect(usageChunk).toBeTruthy()
96+
expect(fallbackSpy).not.toHaveBeenCalled()
97+
})
98+
99+
it("replays the turn without the prior assistant message when Codex repeats itself", async () => {
100+
const messages: Anthropic.Messages.MessageParam[] = [
101+
{ role: "user", content: "Initial request" },
102+
{ role: "assistant", content: "Task completed successfully." },
103+
{ role: "user", content: "Please continue" },
104+
]
105+
106+
createSessionMock
107+
.mockResolvedValueOnce(
108+
makeSession([
109+
{ type: "text", text: "Task completed successfully." },
110+
{ type: "usage", inputTokens: 10, outputTokens: 2 },
111+
]),
112+
)
113+
.mockResolvedValueOnce(
114+
makeSession([
115+
{ type: "text", text: "Continuing with the follow-up work." },
116+
{ type: "usage", inputTokens: 12, outputTokens: 6 },
117+
]),
118+
)
119+
120+
const stream = handler.createMessage(systemPrompt, messages, metadata)
121+
const chunks: ApiStreamChunk[] = []
122+
for await (const chunk of stream) {
123+
chunks.push(chunk)
124+
}
125+
126+
const textChunks = chunks.filter((chunk) => chunk.type === "text")
127+
expect(textChunks).toHaveLength(1)
128+
expect(textChunks[0].text).toBe("Continuing with the follow-up work.")
129+
expect(createSessionMock).toHaveBeenCalledTimes(2)
130+
expect(fallbackSpy).not.toHaveBeenCalled()
131+
})
132+
})

0 commit comments

Comments
 (0)