@@ -92,6 +92,10 @@ You are a professional technical documentation engineer, skilled in writing high
9292<% if (terms) { %>
9393<%- terms %>
9494<% } %>
95+ <% if (isChunk) { %>
96+ ## Chunk Translation Notice
97+ This is part of a larger document that has been split into smaller chunks for translation. Please translate this chunk as if it's part of a continuous document, maintaining consistency with the overall document style and context.
98+ <% } %>
9599
96100<% if (userPrompt || additionalPrompts) { %>
97101## Additional Requirements
@@ -117,6 +121,7 @@ export interface InternalTranslateOptions extends TranslateOptions {
117121 sourceContent : string
118122 target : Language
119123 additionalPrompts ?: string
124+ isChunk ?: boolean
120125}
121126
122127const resolveTerms = async (
@@ -220,13 +225,83 @@ function getTitleTranslation(
220225 return null
221226}
222227
228+ function splitContentIntoChunks (
229+ content : string ,
230+ maxChunkSize : number ,
231+ ) : string [ ] {
232+ const lines = content . split ( '\n' )
233+ const chunks : string [ ] = [ ]
234+ let currentChunk : string [ ] = [ ]
235+ let currentSize = 0
236+
237+ for ( const line of lines ) {
238+ const lineSize = Buffer . byteLength ( line + '\n' , 'utf8' )
239+
240+ // If adding this line would exceed the chunk size, and we have content in current chunk
241+ if ( currentSize + lineSize > maxChunkSize && currentChunk . length > 0 ) {
242+ chunks . push ( currentChunk . join ( '\n' ) )
243+ currentChunk = [ line ]
244+ currentSize = lineSize
245+ } else {
246+ currentChunk . push ( line )
247+ currentSize += lineSize
248+ }
249+ }
250+
251+ // Add the last chunk if it has content
252+ if ( currentChunk . length > 0 ) {
253+ chunks . push ( currentChunk . join ( '\n' ) )
254+ }
255+
256+ return chunks
257+ }
258+
259+ export const translateWithChunks = async (
260+ options : InternalTranslateOptions ,
261+ ) : Promise < string > => {
262+ const { sourceContent } = options
263+ const maxChunkSize = 60 * 1024
264+
265+ const contentSize = Buffer . byteLength ( sourceContent , 'utf8' )
266+ if ( contentSize <= maxChunkSize ) {
267+ return translate ( options )
268+ }
269+
270+ logger . info (
271+ `Content size (${ Math . round ( contentSize / 1024 ) } KB) exceeds limit, splitting into chunks...` ,
272+ )
273+
274+ const chunks = splitContentIntoChunks ( sourceContent , maxChunkSize )
275+ logger . info ( `Split content into ${ chunks . length } chunks` )
276+
277+ const translatedChunks : string [ ] = [ ]
278+
279+ for ( let i = 0 ; i < chunks . length ; i ++ ) {
280+ logger . info ( `Translating chunk ${ i + 1 } /${ chunks . length } ...` )
281+
282+ const translatedChunk = await translate ( {
283+ ...options ,
284+ sourceContent : chunks [ i ] ,
285+ isChunk : true ,
286+ } )
287+
288+ translatedChunks . push ( translatedChunk )
289+ }
290+
291+ const result = translatedChunks . join ( '\n' )
292+ logger . info ( `Successfully translated ${ chunks . length } chunks` )
293+
294+ return result
295+ }
296+
223297export const translate = async ( {
224298 source,
225299 sourceContent,
226300 target,
227301 systemPrompt,
228302 userPrompt = '' ,
229303 additionalPrompts = '' ,
304+ isChunk = false ,
230305} : InternalTranslateOptions ) => {
231306 if ( ! openai ) {
232307 openai = new AzureOpenAI ( {
@@ -275,6 +350,7 @@ export const translate = async ({
275350 additionalPrompts : additionalPrompts ,
276351 terms,
277352 titleTranslationPrompt,
353+ isChunk,
278354 } ,
279355 { async : true } ,
280356 )
@@ -543,7 +619,7 @@ export const translateCommand = new Command('translate')
543619 ) ,
544620 } )
545621
546- targetContent = await translate ( {
622+ targetContent = await translateWithChunks ( {
547623 ...config . translate ,
548624 source,
549625 sourceContent : normalizedSourceContent ,
0 commit comments