Skip to content

Commit 89841d6

Browse files
committed
Enhance error handling and logging in translation process, streamline file path usage
1 parent a115312 commit 89841d6

File tree

1 file changed

+128
-60
lines changed

1 file changed

+128
-60
lines changed

i18n/controllers/recurTranslate.ts

Lines changed: 128 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import sax from "sax";
77
import { Readable } from "stream";
88
import { fileURLToPath } from "url";
99
import { isGeneratorObject } from "util/types";
10+
import { AssistantStream } from "openai/lib/AssistantStream.mjs";
1011

1112
dotenv.config();
1213

@@ -20,25 +21,57 @@ const ai = new OpenAI({
2021
baseURL: process.env.AI_BASEURL
2122
});
2223

23-
const ignoredTags = ["LATEXINLINE", "LATEX", "SNIPPET", "SCHEMEINLINE", "SCHEME", "LONG_PAGE", "LABEL"];
24+
const ignoredTags = [
25+
"LATEXINLINE",
26+
"LATEX",
27+
"SNIPPET",
28+
"SCHEMEINLINE",
29+
"SCHEME",
30+
"LONG_PAGE",
31+
"LABEL"
32+
];
2433

2534
const MAXLEN = Number(process.env.MAX_LEN) || 3000;
2635

36+
// Centralized logging to prevent duplicate messages
37+
const errorMessages = new Set();
38+
function logError(message: string, error?: any) {
39+
// Create a unique key for this error message
40+
const errorKey = message + (error ? error.toString() : "");
41+
// Only log if we haven't seen this exact message before
42+
if (!errorMessages.has(errorKey)) {
43+
errorMessages.add(errorKey);
44+
if (error) {
45+
console.error(message, error);
46+
} else {
47+
console.error(message);
48+
}
49+
}
50+
}
51+
2752
const createParser = () =>
2853
(sax as any).createStream(true, { trim: false }, { strictEntities: true });
2954

3055
async function translate(language: string, filePath: string): Promise<void> {
3156
const startTime = new Date().getTime();
57+
let assistant;
58+
3259
try {
33-
// Pipe the XML file into the parser.
34-
const input_dir = fileURLToPath(
35-
import.meta.resolve("../../xml/en" + filePath)
36-
);
60+
// Use the provided file path directly without modification
61+
const input_path = filePath;
62+
63+
assistant = await createAssistant(language, ai as any);
3764

38-
const translated: string = await recursivelyTranslate(language, input_dir);
65+
// Generate output path by replacing "/en/" with "/cn/" in the path
66+
const output_path = filePath.replace(
67+
path.sep + "en" + path.sep,
68+
path.sep + "cn" + path.sep
69+
);
3970

40-
const output_path = fileURLToPath(
41-
import.meta.resolve("../../xml/cn" + filePath)
71+
const translated: string = await recursivelyTranslate(
72+
language,
73+
input_path,
74+
assistant.id
4275
);
4376

4477
// Ensure directory exists
@@ -48,17 +81,23 @@ async function translate(language: string, filePath: string): Promise<void> {
4881
fs.writeFileSync(output_path, translated);
4982
console.log(`Translation saved to ${output_path}`);
5083
} catch (parseErr) {
51-
console.error("Error parsing XML:", parseErr);
84+
logError(`Error translating file ${filePath}:`, parseErr);
5285
} finally {
86+
if (assistant) {
87+
await ai.beta.assistants.del(assistant.id).catch(err => {
88+
logError(`Error deleting assistant:`, err);
89+
});
90+
}
5391
const elapsed = new Date().getTime() - startTime;
5492
console.log(filePath + " took " + elapsed / 1000.0 + " seconds");
5593
}
5694
}
5795

58-
// TODO: change the toTranslate to a file path, read the file and translate the content
96+
// Function to translate the content of a file
5997
async function recursivelyTranslate(
6098
language: string,
61-
path: string
99+
filePath: string,
100+
assistant_id: string
62101
): Promise<string> {
63102
// Recursive function to split and translate
64103
async function helper(ori: string): Promise<string> {
@@ -124,14 +163,12 @@ async function recursivelyTranslate(
124163
if (tagName === "WRAPPER") {
125164
return;
126165
}
127-
166+
128167
subCurrentSegment += `</${tagName}>`;
129168

130169
if (subCurrentDepth === 2) {
131170
// We are closing a segment element.
132-
if (
133-
ignoredTags.includes(tagName)
134-
) {
171+
if (ignoredTags.includes(tagName)) {
135172
subSegments.push([false, subCurrentSegment]);
136173
} else if (
137174
subSegments.length > 0 &&
@@ -147,12 +184,12 @@ async function recursivelyTranslate(
147184
subCurrentSegment = "";
148185
subIsRecording = false;
149186
}
150-
187+
151188
if (subCurrentDepth === 1) {
152-
subSegments.push([false, `</${tagName}>`])
189+
subSegments.push([false, `</${tagName}>`]);
153190
subCurrentSegment = "";
154191
}
155-
192+
156193
subCurrentDepth--;
157194
});
158195

@@ -166,16 +203,34 @@ async function recursivelyTranslate(
166203

167204
subParser.on("end", async () => {
168205
for (const segment of subSegments) {
169-
if (segment[0]) {
170-
subTranslated.push(await helper(segment[1]));
171-
} else {
172-
subTranslated.push(segment[1]);
206+
try {
207+
if (segment[0]) {
208+
subTranslated.push(await helper(segment[1]));
209+
} else {
210+
subTranslated.push(segment[1]);
211+
}
212+
} catch (error) {
213+
logError(`Error translating segment in ${filePath}:`, error);
214+
// Add error comment and continue with next segment
215+
subTranslated.push(
216+
segment[1] + `<!-- Error translating this segment -->`
217+
);
173218
}
174219
}
175220
resolve();
176221
});
177222

178-
subParser.on("error", reject);
223+
subParser.on("error", err => {
224+
logError(`Error in subParser for ${filePath}:`, err);
225+
// Try to recover and continue
226+
try {
227+
subParser._parser.error = null;
228+
subParser._parser.resume();
229+
} catch (resumeErr) {
230+
logError(`Could not recover from parser error:`, resumeErr);
231+
reject(err);
232+
}
233+
});
179234

180235
Readable.from("<WRAPPER>" + ori + "</WRAPPER>").pipe(subParser);
181236
});
@@ -186,14 +241,12 @@ async function recursivelyTranslate(
186241
// Create a SAX parser in strict mode to split source into chunks.
187242
const parser = createParser();
188243

189-
// const assistant = await createAssistant(language, ai as any);
190-
const assistant_id = "asst_BLVYfog5DpWrbu3fW3o2oD4r";
191244
const thread = await ai.beta.threads.create();
192245
let translated: String[] = [];
193246

194247
try {
195248
await new Promise<void>((resolve, reject) => {
196-
console.log("Translating " + path + " at " + thread.id);
249+
console.log("Translating " + filePath + " at " + thread.id);
197250
// Variables to track current depth and segments.
198251
let currentDepth = 0;
199252
let currentSegment = "";
@@ -278,36 +331,51 @@ async function recursivelyTranslate(
278331

279332
parser.on("end", async () => {
280333
for (const segment of segments) {
281-
if (segment[0]) {
282-
translated.push(await helper(segment[1]));
283-
} else {
284-
translated.push(segment[1]);
334+
try {
335+
if (segment[0]) {
336+
translated.push(await helper(segment[1]));
337+
} else {
338+
translated.push(segment[1]);
339+
}
340+
} catch (error) {
341+
logError(`Error translating segment in ${filePath}:`, error);
342+
// Add error comment and continue with next segment
343+
translated.push(
344+
segment[1] + `<!-- Error translating this section -->`
345+
);
285346
}
286347
}
287348
resolve();
288349
});
289350

290-
parser.on("error", reject);
351+
parser.on("error", err => {
352+
logError(`Parser error in ${filePath}:`, err);
353+
// Try to recover and continue
354+
try {
355+
parser._parser.error = null;
356+
parser._parser.resume();
357+
} catch (resumeErr) {
358+
logError(`Could not recover from parser error:`, resumeErr);
359+
reject(err);
360+
}
361+
});
291362

292-
fs.createReadStream(path).pipe(parser);
363+
// Use the file path directly without modification
364+
fs.createReadStream(filePath).pipe(parser);
293365
});
294366

295367
return translated.join("");
296368
} catch (parseErr) {
297-
console.error("Error parsing XML:", parseErr);
298-
return translated.join("") + "<!-- Error parsing this section -->";
369+
logError(`Error parsing XML in ${filePath}:`, parseErr);
370+
// Return what we have so far plus error comment
371+
return translated.join("") + `<!-- Error parsing this file -->`;
299372
}
300373

301374
async function translateChunk(chunk: string): Promise<string> {
302375
if (chunk.trim() === "" || chunk.trim() === "," || chunk.trim() === ".") {
303376
return chunk;
304377
}
305378

306-
// console.log("Translating chunk of length: " + chunk.length);
307-
// if (chunk.length < 100) {
308-
// console.log("\nchunk: " + chunk);
309-
// }
310-
311379
let translatedChunk = "";
312380

313381
try {
@@ -330,7 +398,7 @@ async function recursivelyTranslate(
330398
});
331399

332400
const message = messages.data.pop()!;
333-
const messageContent = message.content[0];
401+
const messageContent = message?.content[0];
334402

335403
if (messageContent.type !== "text") {
336404
throw new Error(
@@ -341,7 +409,6 @@ async function recursivelyTranslate(
341409
const text = messageContent.text;
342410

343411
const safeText = escapeXML(text.value);
344-
// const safeText = chunk;
345412
const textStream = Readable.from("<WRAPPER>" + safeText + "</WRAPPER>");
346413

347414
await new Promise<void>((resolve, reject) => {
@@ -359,13 +426,21 @@ async function recursivelyTranslate(
359426

360427
clean.on("opentag", node => {
361428
currDepth++;
362-
if (node.name != "WRAPPER" && node.name != "TRANSLATE") {
429+
if (
430+
node.name != "WRAPPER" &&
431+
node.name != "TRANSLATE" &&
432+
!ignoredTags.includes(node.name)
433+
) {
363434
translatedChunk += `<${node.name}${formatAttributes(node.attributes)}>`;
364435
}
365436
});
366437

367438
clean.on("closetag", tagName => {
368-
if (tagName != "WRAPPER" && tagName != "TRANSLATE") {
439+
if (
440+
tagName != "WRAPPER" &&
441+
tagName != "TRANSLATE" &&
442+
!ignoredTags.includes(tagName)
443+
) {
369444
translatedChunk += `</${tagName}>`;
370445
}
371446
currDepth--;
@@ -380,24 +455,19 @@ async function recursivelyTranslate(
380455
});
381456

382457
clean.on("error", error => {
383-
console.log(
384-
"error encountered when validating XML: " +
385-
error +
386-
"\nfile: " +
387-
path +
388-
"\n section: " +
389-
safeText +
390-
"\n original text: " +
391-
chunk
392-
);
458+
// Log only once with abbreviated content
459+
logError(`XML validation error in ${filePath}`, error);
393460

394461
// Attempt to recover using the internal parser
395462
try {
396463
clean._parser.error = null;
397464
clean._parser.resume();
465+
// Continue processing despite the error
466+
resolve();
398467
} catch (e) {
399-
console.log("Failed to resume parser:", e);
400-
reject(e);
468+
// Add error comment and resolve instead of rejecting
469+
translatedChunk += `<!-- XML validation error -->`;
470+
resolve();
401471
}
402472
});
403473

@@ -408,11 +478,9 @@ async function recursivelyTranslate(
408478

409479
return translatedChunk;
410480
} catch (err) {
411-
console.log(`Error occured while translating ${path}:\n ` + err);
412-
return (
413-
translatedChunk +
414-
`<!-- Error occured while translating this section-->\n<!-- Error: ${err}-->`
415-
);
481+
logError(`Error occurred while translating chunk in ${filePath}:`, err);
482+
// Return the original chunk with error comment rather than throwing
483+
return chunk + `<!-- Error occurred while translating this section -->`;
416484
}
417485
}
418486
}

0 commit comments

Comments
 (0)