@@ -25,6 +25,7 @@ const createParser = () =>
2525 ( sax as any ) . createStream ( true , { trim : false } , { strictEntities : true } ) ;
2626
2727async function translate ( language : string , filePath : string ) : Promise < void > {
28+ const startTime = new Date ( ) . getTime ( ) ;
2829 try {
2930 // Pipe the XML file into the parser.
3031 const input_dir = fileURLToPath (
@@ -45,6 +46,9 @@ async function translate(language: string, filePath: string): Promise<void> {
4546 console . log ( `Translation saved to ${ output_path } ` ) ;
4647 } catch ( parseErr ) {
4748 console . error ( "Error parsing XML:" , parseErr ) ;
49+ } finally {
50+ const elapsed = new Date ( ) . getTime ( ) - startTime ;
51+ console . log ( filePath + " took " + elapsed / 1000.0 + " seconds" ) ;
4852 }
4953}
5054
@@ -59,7 +63,7 @@ async function recursivelyTranslate(
5963 return await translateChunk ( ori ) ; // translate the chunk
6064 }
6165
62- let subTranslated = "" ;
66+ let subTranslated : string [ ] = [ ] ;
6367 // continue splitting the chunk
6468 // Create a SAX parser in strict mode to split source into chunks.
6569 await new Promise < void > ( ( resolve , reject ) => {
@@ -71,6 +75,10 @@ async function recursivelyTranslate(
7175 let subIsRecording = false ;
7276
7377 subParser . on ( "opentag" , node => {
78+ if ( node . name === "WRAPPER" ) {
79+ return ;
80+ }
81+
7482 subCurrentDepth ++ ;
7583
7684 // If we're at depth 2, this is the start of a new segment.
@@ -95,17 +103,14 @@ async function recursivelyTranslate(
95103 subSegments [ subSegments . length - 1 ] [ 0 ]
96104 ) {
97105 subSegments [ subSegments . length - 1 ] [ 1 ] += text ;
98- subSegments [ subSegments . length - 1 ] [ 0 ] = true ;
99- } else {
100- if (
106+ } else if (
101107 text . trim ( ) !== "" ||
102108 text . trim ( ) === "," ||
103109 text . trim ( ) === "."
104110 ) {
105111 subSegments . push ( [ false , text ] ) ;
106112 } else {
107113 subSegments . push ( [ true , text ] ) ;
108- }
109114 }
110115 }
111116 } ) ;
@@ -117,16 +122,36 @@ async function recursivelyTranslate(
117122 } ) ;
118123
119124 subParser . on ( "closetag" , tagName => {
125+ if ( tagName === "WRAPPER" ) {
126+ return ;
127+ }
128+
120129 if ( subIsRecording ) {
121130 subCurrentSegment += `</${ tagName } >` ;
122131 }
123132
124133 if ( subCurrentDepth === 2 ) {
125134 // We are closing a segment element.
126- if ( tagName === "LATEXINLINE" ) {
135+ if (
136+ tagName === "LATEXINLINE" ||
137+ tagName === "LATEX" ||
138+ tagName === "SNIPPET" ||
139+ tagName === "SCHEMEINLINE"
140+ ) {
127141 subSegments . push ( [ false , subCurrentSegment ] ) ;
128142 } else {
143+ if (
144+ subSegments . length > 0 &&
145+ subSegments [ subSegments . length - 1 ] [ 0 ] &&
146+ ( subSegments [ subSegments . length - 1 ] [ 1 ] . length +
147+ subCurrentSegment . length ) <
148+ MAXLEN
149+ ) {
150+ console . log ( "Merging segments" ) ;
151+ subSegments [ subSegments . length - 1 ] [ 1 ] += subCurrentSegment ;
152+ } else {
129153 subSegments . push ( [ true , subCurrentSegment ] ) ;
154+ }
130155 }
131156 subCurrentSegment = "" ;
132157 subIsRecording = false ;
@@ -151,20 +176,20 @@ async function recursivelyTranslate(
151176 subParser . on ( "end" , async ( ) => {
152177 for ( const segment of subSegments ) {
153178 if ( segment [ 0 ] ) {
154- subTranslated += await helper ( segment [ 1 ] , false ) ;
179+ subTranslated . push ( await helper ( segment [ 1 ] , false ) ) ;
155180 } else {
156- subTranslated += segment [ 1 ] ;
181+ subTranslated . push ( segment [ 1 ] ) ;
157182 }
158183 }
159184 resolve ( ) ;
160185 } ) ;
161186
162187 subParser . on ( "error" , reject ) ;
163188
164- Readable . from ( ori ) . pipe ( subParser ) ;
189+ Readable . from ( "<WRAPPER>" + ori + "</WRAPPER>" ) . pipe ( subParser ) ;
165190 } ) ;
166191
167- return subTranslated ;
192+ return subTranslated . join ( "" ) ;
168193 }
169194
170195 // Create a SAX parser in strict mode to split source into chunks.
@@ -173,7 +198,7 @@ async function recursivelyTranslate(
173198 // const assistant = await createAssistant(language, ai);
174199 const assistant_id = "asst_BLVYfog5DpWrbu3fW3o2oD4r" ;
175200 const thread = await ai . beta . threads . create ( ) ;
176- let translated = "" ;
201+ let translated : String [ ] = [ ] ;
177202
178203 try {
179204 await new Promise < void > ( ( resolve , reject ) => {
@@ -250,9 +275,9 @@ async function recursivelyTranslate(
250275 parser . on ( "end" , async ( ) => {
251276 for ( const segment of segments ) {
252277 if ( segment [ 0 ] ) {
253- translated += await helper ( segment [ 1 ] , false ) ;
278+ translated . push ( await helper ( segment [ 1 ] , false ) ) ;
254279 } else {
255- translated += segment [ 1 ] ;
280+ translated . push ( segment [ 1 ] ) ;
256281 }
257282 }
258283 console . log ( `Done translating all segments.` ) ;
@@ -264,14 +289,19 @@ async function recursivelyTranslate(
264289 fs . createReadStream ( path ) . pipe ( parser ) ;
265290 } ) ;
266291
267- return translated ;
292+ return translated . join ( "" ) ;
268293 } catch ( parseErr ) {
269294 console . error ( "Error parsing XML:" , parseErr ) ;
270- return translated + "<!-- Error parsing this section -->" ;
295+ return translated . join ( "" ) + "<!-- Error parsing this section -->" ;
271296 }
272297
273298 async function translateChunk ( chunk : string ) : Promise < string > {
299+ if ( chunk . trim ( ) === "" || chunk . trim ( ) === "," || chunk . trim ( ) === "." ) {
300+ return chunk ;
301+ }
302+
274303 let translatedChunk = "" ;
304+ console . log ( "Translating chunk of length: " + chunk . length + "\n" + chunk ) ;
275305
276306 try {
277307 await ai . beta . threads . messages . create ( thread . id , {
@@ -364,7 +394,10 @@ async function recursivelyTranslate(
364394 return translatedChunk ;
365395 } catch ( err ) {
366396 console . log ( `Error occured while translating ${ path } :\n ` + err ) ;
367- return translatedChunk + "<!-- Error translating this section -->" ;
397+ return (
398+ translatedChunk +
399+ `<!-- Error occured while translating this section-->\n<!-- Error: ${ err . length < 50 ? err : err . subString ( 0 , 50 ) + "..." } -->`
400+ ) ;
368401 }
369402 }
370403}
@@ -393,4 +426,4 @@ function strongEscapeXML(str: string): string {
393426 . replace ( / > / g, ">" )
394427 . replace ( / " / g, """ )
395428 . replace ( / ' / g, "'" ) ;
396- }
429+ }
0 commit comments