Skip to content

Commit 194cbd4

Browse files
committed
fix(translate): harden data-url placeholder integrity and retry
1 parent 2008d94 commit 194cbd4

File tree

3 files changed

+315
-41
lines changed

3 files changed

+315
-41
lines changed

scripts/notion-translate/test-openai-mock.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,17 @@ vi.mock("openai", () => ({
3030
}));
3131

3232
export const resetOpenAIMock = () => {
33-
mockOpenAIChatCompletionCreate.mockClear();
33+
mockOpenAIChatCompletionCreate.mockReset();
34+
mockOpenAIChatCompletionCreate.mockResolvedValue({
35+
choices: [
36+
{
37+
message: {
38+
content: JSON.stringify({
39+
markdown: "# translated\n\nMock content",
40+
title: "Mock Title",
41+
}),
42+
},
43+
},
44+
],
45+
});
3446
};

scripts/notion-translate/translateFrontMatter.test.ts

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ describe("notion-translate translateFrontMatter", () => {
5858
it("classifies token overflow errors as non-critical token_overflow code", async () => {
5959
const { translateText } = await import("./translateFrontMatter");
6060

61-
mockOpenAIChatCompletionCreate.mockRejectedValueOnce({
61+
mockOpenAIChatCompletionCreate.mockRejectedValue({
6262
status: 400,
6363
message:
6464
"Input tokens exceed the configured limit of 272000 tokens. Your messages resulted in 486881 tokens.",
@@ -72,6 +72,23 @@ describe("notion-translate translateFrontMatter", () => {
7272
);
7373
});
7474

75+
it("classifies DeepSeek maximum-context errors as token_overflow", async () => {
76+
const { translateText } = await import("./translateFrontMatter");
77+
78+
mockOpenAIChatCompletionCreate.mockRejectedValue({
79+
status: 400,
80+
message:
81+
"This model's maximum context length is 131072 tokens. However, you requested 211994 tokens (211994 in the messages, 0 in the completion).",
82+
});
83+
84+
await expect(translateText("# Body", "Title", "pt-BR")).rejects.toEqual(
85+
expect.objectContaining({
86+
code: "token_overflow",
87+
isCritical: false,
88+
})
89+
);
90+
});
91+
7592
it("takes the single-call fast path for small content", async () => {
7693
const { translateText } = await import("./translateFrontMatter");
7794

@@ -111,6 +128,108 @@ describe("notion-translate translateFrontMatter", () => {
111128
expect(result.markdown.length).toBeGreaterThan(0);
112129
});
113130

131+
it("retries the fast path with adaptive splitting on token overflow", async () => {
132+
const { translateText } = await import("./translateFrontMatter");
133+
134+
mockOpenAIChatCompletionCreate
135+
.mockRejectedValueOnce({
136+
status: 400,
137+
message:
138+
"This model's maximum context length is 131072 tokens. However, you requested 211603 tokens (211603 in the messages, 0 in the completion).",
139+
})
140+
.mockResolvedValue({
141+
choices: [
142+
{
143+
message: {
144+
content: JSON.stringify({
145+
markdown: "translated chunk",
146+
title: "Translated Title",
147+
}),
148+
},
149+
},
150+
],
151+
});
152+
153+
const result = await translateText(
154+
"# Small page\n\nJust a paragraph.",
155+
"Small",
156+
"pt-BR"
157+
);
158+
159+
expect(mockOpenAIChatCompletionCreate.mock.calls.length).toBeGreaterThan(1);
160+
expect(result.title).toBe("Translated Title");
161+
expect(result.markdown.length).toBeGreaterThan(0);
162+
});
163+
164+
it("masks and restores data URL images during translation", async () => {
165+
const { translateText } = await import("./translateFrontMatter");
166+
const dataUrl = `data:image/png;base64,${"A".repeat(6000)}`;
167+
const placeholderPath = "/images/__data_url_placeholder_0__.png";
168+
169+
mockOpenAIChatCompletionCreate.mockResolvedValue({
170+
choices: [
171+
{
172+
message: {
173+
content: JSON.stringify({
174+
markdown: `![image](${placeholderPath})\n\nTranslated`,
175+
title: "Translated Title",
176+
}),
177+
},
178+
},
179+
],
180+
});
181+
182+
const source = `![image](${dataUrl})\n\nBody text`;
183+
const result = await translateText(source, "Title", "pt-BR");
184+
185+
const firstCallArgs = mockOpenAIChatCompletionCreate.mock.calls[0][0] as {
186+
messages?: Array<{ role: string; content: string }>;
187+
};
188+
const userPrompt = firstCallArgs.messages?.[1]?.content ?? "";
189+
190+
expect(userPrompt).not.toContain(dataUrl);
191+
expect(userPrompt).toContain(placeholderPath);
192+
expect(result.markdown).toContain(dataUrl);
193+
});
194+
195+
it("retries when placeholder integrity check fails", async () => {
196+
const { translateText } = await import("./translateFrontMatter");
197+
const dataUrl = `data:image/png;base64,${"B".repeat(6000)}`;
198+
const placeholderPath = "/images/__data_url_placeholder_0__.png";
199+
200+
mockOpenAIChatCompletionCreate
201+
.mockResolvedValueOnce({
202+
choices: [
203+
{
204+
message: {
205+
content: JSON.stringify({
206+
markdown: "![image](/images/changed-path.png)\n\nTranslated",
207+
title: "Translated Title",
208+
}),
209+
},
210+
},
211+
],
212+
})
213+
.mockResolvedValueOnce({
214+
choices: [
215+
{
216+
message: {
217+
content: JSON.stringify({
218+
markdown: `![image](${placeholderPath})\n\nTranslated`,
219+
title: "Translated Title",
220+
}),
221+
},
222+
},
223+
],
224+
});
225+
226+
const source = `![image](${dataUrl})\n\nBody text`;
227+
const result = await translateText(source, "Title", "pt-BR");
228+
229+
expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(2);
230+
expect(result.markdown).toContain(dataUrl);
231+
});
232+
114233
it("splitMarkdownIntoChunks does not split on headings inside fenced code blocks", async () => {
115234
const { splitMarkdownIntoChunks } = await import("./translateFrontMatter");
116235

0 commit comments

Comments
 (0)