@@ -7,6 +7,7 @@ import sax from "sax";
7
7
import { Readable } from "stream" ;
8
8
import { fileURLToPath } from "url" ;
9
9
import { isGeneratorObject } from "util/types" ;
10
+ import { AssistantStream } from "openai/lib/AssistantStream.mjs" ;
10
11
11
12
dotenv . config ( ) ;
12
13
@@ -20,25 +21,57 @@ const ai = new OpenAI({
20
21
baseURL : process . env . AI_BASEURL
21
22
} ) ;
22
23
23
- const ignoredTags = [ "LATEXINLINE" , "LATEX" , "SNIPPET" , "SCHEMEINLINE" , "SCHEME" , "LONG_PAGE" , "LABEL" ] ;
24
+ const ignoredTags = [
25
+ "LATEXINLINE" ,
26
+ "LATEX" ,
27
+ "SNIPPET" ,
28
+ "SCHEMEINLINE" ,
29
+ "SCHEME" ,
30
+ "LONG_PAGE" ,
31
+ "LABEL"
32
+ ] ;
24
33
25
34
const MAXLEN = Number ( process . env . MAX_LEN ) || 3000 ;
26
35
36
+ // Centralized logging to prevent duplicate messages
37
+ const errorMessages = new Set ( ) ;
38
+ function logError ( message : string , error ?: any ) {
39
+ // Create a unique key for this error message
40
+ const errorKey = message + ( error ? error . toString ( ) : "" ) ;
41
+ // Only log if we haven't seen this exact message before
42
+ if ( ! errorMessages . has ( errorKey ) ) {
43
+ errorMessages . add ( errorKey ) ;
44
+ if ( error ) {
45
+ console . error ( message , error ) ;
46
+ } else {
47
+ console . error ( message ) ;
48
+ }
49
+ }
50
+ }
51
+
27
52
const createParser = ( ) =>
28
53
( sax as any ) . createStream ( true , { trim : false } , { strictEntities : true } ) ;
29
54
30
55
async function translate ( language : string , filePath : string ) : Promise < void > {
31
56
const startTime = new Date ( ) . getTime ( ) ;
57
+ let assistant ;
58
+
32
59
try {
33
- // Pipe the XML file into the parser.
34
- const input_dir = fileURLToPath (
35
- import . meta . resolve ( "../../xml/en" + filePath )
36
- ) ;
60
+ // Use the provided file path directly without modification
61
+ const input_path = filePath ;
62
+
63
+ assistant = await createAssistant ( language , ai as any ) ;
37
64
38
- const translated : string = await recursivelyTranslate ( language , input_dir ) ;
65
+ // Generate output path by replacing "/en/" with "/cn/" in the path
66
+ const output_path = filePath . replace (
67
+ path . sep + "en" + path . sep ,
68
+ path . sep + "cn" + path . sep
69
+ ) ;
39
70
40
- const output_path = fileURLToPath (
41
- import . meta. resolve ( "../../xml/cn" + filePath )
71
+ const translated : string = await recursivelyTranslate (
72
+ language ,
73
+ input_path ,
74
+ assistant . id
42
75
) ;
43
76
44
77
// Ensure directory exists
@@ -48,17 +81,23 @@ async function translate(language: string, filePath: string): Promise<void> {
48
81
fs . writeFileSync ( output_path , translated ) ;
49
82
console . log ( `Translation saved to ${ output_path } ` ) ;
50
83
} catch ( parseErr ) {
51
- console . error ( " Error parsing XML:" , parseErr ) ;
84
+ logError ( ` Error translating file ${ filePath } :` , parseErr ) ;
52
85
} finally {
86
+ if ( assistant ) {
87
+ await ai . beta . assistants . del ( assistant . id ) . catch ( err => {
88
+ logError ( `Error deleting assistant:` , err ) ;
89
+ } ) ;
90
+ }
53
91
const elapsed = new Date ( ) . getTime ( ) - startTime ;
54
92
console . log ( filePath + " took " + elapsed / 1000.0 + " seconds" ) ;
55
93
}
56
94
}
57
95
58
- // TODO: change the toTranslate to a file path, read the file and translate the content
96
+ // Function to translate the content of a file
59
97
async function recursivelyTranslate (
60
98
language : string ,
61
- path : string
99
+ filePath : string ,
100
+ assistant_id : string
62
101
) : Promise < string > {
63
102
// Recursive function to split and translate
64
103
async function helper ( ori : string ) : Promise < string > {
@@ -124,14 +163,12 @@ async function recursivelyTranslate(
124
163
if ( tagName === "WRAPPER" ) {
125
164
return ;
126
165
}
127
-
166
+
128
167
subCurrentSegment += `</${ tagName } >` ;
129
168
130
169
if ( subCurrentDepth === 2 ) {
131
170
// We are closing a segment element.
132
- if (
133
- ignoredTags . includes ( tagName )
134
- ) {
171
+ if ( ignoredTags . includes ( tagName ) ) {
135
172
subSegments . push ( [ false , subCurrentSegment ] ) ;
136
173
} else if (
137
174
subSegments . length > 0 &&
@@ -147,12 +184,12 @@ async function recursivelyTranslate(
147
184
subCurrentSegment = "" ;
148
185
subIsRecording = false ;
149
186
}
150
-
187
+
151
188
if ( subCurrentDepth === 1 ) {
152
- subSegments . push ( [ false , `</${ tagName } >` ] )
189
+ subSegments . push ( [ false , `</${ tagName } >` ] ) ;
153
190
subCurrentSegment = "" ;
154
191
}
155
-
192
+
156
193
subCurrentDepth -- ;
157
194
} ) ;
158
195
@@ -166,16 +203,34 @@ async function recursivelyTranslate(
166
203
167
204
subParser . on ( "end" , async ( ) => {
168
205
for ( const segment of subSegments ) {
169
- if ( segment [ 0 ] ) {
170
- subTranslated . push ( await helper ( segment [ 1 ] ) ) ;
171
- } else {
172
- subTranslated . push ( segment [ 1 ] ) ;
206
+ try {
207
+ if ( segment [ 0 ] ) {
208
+ subTranslated . push ( await helper ( segment [ 1 ] ) ) ;
209
+ } else {
210
+ subTranslated . push ( segment [ 1 ] ) ;
211
+ }
212
+ } catch ( error ) {
213
+ logError ( `Error translating segment in ${ filePath } :` , error ) ;
214
+ // Add error comment and continue with next segment
215
+ subTranslated . push (
216
+ segment [ 1 ] + `<!-- Error translating this segment -->`
217
+ ) ;
173
218
}
174
219
}
175
220
resolve ( ) ;
176
221
} ) ;
177
222
178
- subParser . on ( "error" , reject ) ;
223
+ subParser . on ( "error" , err => {
224
+ logError ( `Error in subParser for ${ filePath } :` , err ) ;
225
+ // Try to recover and continue
226
+ try {
227
+ subParser . _parser . error = null ;
228
+ subParser . _parser . resume ( ) ;
229
+ } catch ( resumeErr ) {
230
+ logError ( `Could not recover from parser error:` , resumeErr ) ;
231
+ reject ( err ) ;
232
+ }
233
+ } ) ;
179
234
180
235
Readable . from ( "<WRAPPER>" + ori + "</WRAPPER>" ) . pipe ( subParser ) ;
181
236
} ) ;
@@ -186,14 +241,12 @@ async function recursivelyTranslate(
186
241
// Create a SAX parser in strict mode to split source into chunks.
187
242
const parser = createParser ( ) ;
188
243
189
- // const assistant = await createAssistant(language, ai as any);
190
- const assistant_id = "asst_BLVYfog5DpWrbu3fW3o2oD4r" ;
191
244
const thread = await ai . beta . threads . create ( ) ;
192
245
let translated : String [ ] = [ ] ;
193
246
194
247
try {
195
248
await new Promise < void > ( ( resolve , reject ) => {
196
- console . log ( "Translating " + path + " at " + thread . id ) ;
249
+ console . log ( "Translating " + filePath + " at " + thread . id ) ;
197
250
// Variables to track current depth and segments.
198
251
let currentDepth = 0 ;
199
252
let currentSegment = "" ;
@@ -278,36 +331,51 @@ async function recursivelyTranslate(
278
331
279
332
parser . on ( "end" , async ( ) => {
280
333
for ( const segment of segments ) {
281
- if ( segment [ 0 ] ) {
282
- translated . push ( await helper ( segment [ 1 ] ) ) ;
283
- } else {
284
- translated . push ( segment [ 1 ] ) ;
334
+ try {
335
+ if ( segment [ 0 ] ) {
336
+ translated . push ( await helper ( segment [ 1 ] ) ) ;
337
+ } else {
338
+ translated . push ( segment [ 1 ] ) ;
339
+ }
340
+ } catch ( error ) {
341
+ logError ( `Error translating segment in ${ filePath } :` , error ) ;
342
+ // Add error comment and continue with next segment
343
+ translated . push (
344
+ segment [ 1 ] + `<!-- Error translating this section -->`
345
+ ) ;
285
346
}
286
347
}
287
348
resolve ( ) ;
288
349
} ) ;
289
350
290
- parser . on ( "error" , reject ) ;
351
+ parser . on ( "error" , err => {
352
+ logError ( `Parser error in ${ filePath } :` , err ) ;
353
+ // Try to recover and continue
354
+ try {
355
+ parser . _parser . error = null ;
356
+ parser . _parser . resume ( ) ;
357
+ } catch ( resumeErr ) {
358
+ logError ( `Could not recover from parser error:` , resumeErr ) ;
359
+ reject ( err ) ;
360
+ }
361
+ } ) ;
291
362
292
- fs . createReadStream ( path ) . pipe ( parser ) ;
363
+ // Use the file path directly without modification
364
+ fs . createReadStream ( filePath ) . pipe ( parser ) ;
293
365
} ) ;
294
366
295
367
return translated . join ( "" ) ;
296
368
} catch ( parseErr ) {
297
- console . error ( "Error parsing XML:" , parseErr ) ;
298
- return translated . join ( "" ) + "<!-- Error parsing this section -->" ;
369
+ logError ( `Error parsing XML in ${ filePath } :` , parseErr ) ;
370
+ // Return what we have so far plus error comment
371
+ return translated . join ( "" ) + `<!-- Error parsing this file -->` ;
299
372
}
300
373
301
374
async function translateChunk ( chunk : string ) : Promise < string > {
302
375
if ( chunk . trim ( ) === "" || chunk . trim ( ) === "," || chunk . trim ( ) === "." ) {
303
376
return chunk ;
304
377
}
305
378
306
- // console.log("Translating chunk of length: " + chunk.length);
307
- // if (chunk.length < 100) {
308
- // console.log("\nchunk: " + chunk);
309
- // }
310
-
311
379
let translatedChunk = "" ;
312
380
313
381
try {
@@ -330,7 +398,7 @@ async function recursivelyTranslate(
330
398
} ) ;
331
399
332
400
const message = messages . data . pop ( ) ! ;
333
- const messageContent = message . content [ 0 ] ;
401
+ const messageContent = message ? .content [ 0 ] ;
334
402
335
403
if ( messageContent . type !== "text" ) {
336
404
throw new Error (
@@ -341,7 +409,6 @@ async function recursivelyTranslate(
341
409
const text = messageContent . text ;
342
410
343
411
const safeText = escapeXML ( text . value ) ;
344
- // const safeText = chunk;
345
412
const textStream = Readable . from ( "<WRAPPER>" + safeText + "</WRAPPER>" ) ;
346
413
347
414
await new Promise < void > ( ( resolve , reject ) => {
@@ -359,13 +426,21 @@ async function recursivelyTranslate(
359
426
360
427
clean . on ( "opentag" , node => {
361
428
currDepth ++ ;
362
- if ( node . name != "WRAPPER" && node . name != "TRANSLATE" ) {
429
+ if (
430
+ node . name != "WRAPPER" &&
431
+ node . name != "TRANSLATE" &&
432
+ ! ignoredTags . includes ( node . name )
433
+ ) {
363
434
translatedChunk += `<${ node . name } ${ formatAttributes ( node . attributes ) } >` ;
364
435
}
365
436
} ) ;
366
437
367
438
clean . on ( "closetag" , tagName => {
368
- if ( tagName != "WRAPPER" && tagName != "TRANSLATE" ) {
439
+ if (
440
+ tagName != "WRAPPER" &&
441
+ tagName != "TRANSLATE" &&
442
+ ! ignoredTags . includes ( tagName )
443
+ ) {
369
444
translatedChunk += `</${ tagName } >` ;
370
445
}
371
446
currDepth -- ;
@@ -380,24 +455,19 @@ async function recursivelyTranslate(
380
455
} ) ;
381
456
382
457
clean . on ( "error" , error => {
383
- console . log (
384
- "error encountered when validating XML: " +
385
- error +
386
- "\nfile: " +
387
- path +
388
- "\n section: " +
389
- safeText +
390
- "\n original text: " +
391
- chunk
392
- ) ;
458
+ // Log only once with abbreviated content
459
+ logError ( `XML validation error in ${ filePath } ` , error ) ;
393
460
394
461
// Attempt to recover using the internal parser
395
462
try {
396
463
clean . _parser . error = null ;
397
464
clean . _parser . resume ( ) ;
465
+ // Continue processing despite the error
466
+ resolve ( ) ;
398
467
} catch ( e ) {
399
- console . log ( "Failed to resume parser:" , e ) ;
400
- reject ( e ) ;
468
+ // Add error comment and resolve instead of rejecting
469
+ translatedChunk += `<!-- XML validation error -->` ;
470
+ resolve ( ) ;
401
471
}
402
472
} ) ;
403
473
@@ -408,11 +478,9 @@ async function recursivelyTranslate(
408
478
409
479
return translatedChunk ;
410
480
} catch ( err ) {
411
- console . log ( `Error occured while translating ${ path } :\n ` + err ) ;
412
- return (
413
- translatedChunk +
414
- `<!-- Error occured while translating this section-->\n<!-- Error: ${ err } -->`
415
- ) ;
481
+ logError ( `Error occurred while translating chunk in ${ filePath } :` , err ) ;
482
+ // Return the original chunk with error comment rather than throwing
483
+ return chunk + `<!-- Error occurred while translating this section -->` ;
416
484
}
417
485
}
418
486
}
0 commit comments