@@ -200,3 +200,171 @@ def finalize_output(
200200 log_callback ("docx_rebuilt" , f"DOCX document reconstructed ({ len (docx_bytes )} bytes)" )
201201
202202 return docx_bytes
203+
204+ async def translate_content (
205+ self ,
206+ raw_content : Any ,
207+ structure_map : Dict [str , Any ],
208+ context : Dict [str , Any ],
209+ source_language : str ,
210+ target_language : str ,
211+ model_name : str ,
212+ llm_client : Any ,
213+ max_tokens_per_chunk : int ,
214+ log_callback : Optional [Callable ] = None ,
215+ context_manager : Optional [Any ] = None ,
216+ max_retries : int = 1 ,
217+ prompt_options : Optional [Dict ] = None ,
218+ stats_callback : Optional [Callable ] = None ,
219+ checkpoint_manager : Optional [Any ] = None ,
220+ translation_id : Optional [str ] = None ,
221+ file_href : Optional [str ] = None ,
222+ check_interruption_callback : Optional [Callable ] = None ,
223+ resume_state : Optional [Any ] = None ,
224+ ** kwargs
225+ ) -> Tuple [bytes , Any ]:
226+ """
227+ Translate DOCX content with checkpoint support.
228+
229+ This method bypasses the generic orchestrator to use _translate_all_chunks_with_checkpoint
230+ for chunk-level interruption and resume support.
231+
232+ Args:
233+ raw_content: DOCX file path (str)
234+ structure_map: Not used (kept for interface compatibility)
235+ context: Context dict with preservation info
236+ source_language: Source language
237+ target_language: Target language
238+ model_name: Model name
239+ llm_client: LLM client
240+ max_tokens_per_chunk: Max tokens per chunk
241+ log_callback: Logging callback
242+ context_manager: Context manager
243+ max_retries: Max retries
244+ prompt_options: Prompt options
245+ stats_callback: Stats callback
246+ checkpoint_manager: Checkpoint manager for partial state
247+ translation_id: Translation ID for checkpointing
248+ file_href: File identifier for checkpointing (use filename for DOCX)
249+ check_interruption_callback: Interruption check callback
250+ resume_state: Resume state for partial translation
251+ **kwargs: Additional arguments
252+
253+ Returns:
254+ (docx_bytes, stats)
255+ """
256+ from ..epub .xhtml_translator import _translate_all_chunks_with_checkpoint
257+ from ..epub .translation_metrics import TranslationMetrics
258+
259+ source_path = raw_content # DOCX file path
260+
261+ # Use filename as file_href if not provided
262+ if not file_href :
263+ import os
264+ file_href = os .path .basename (source_path )
265+
266+ # === RESUME FROM PARTIAL STATE ===
267+ if resume_state :
268+ if log_callback :
269+ log_callback ("docx_resume_partial" ,
270+ f"📂 Resuming DOCX translation from chunk { resume_state .current_chunk_index } /{ len (resume_state .chunks )} " )
271+
272+ # Restore state from checkpoint
273+ chunks = resume_state .chunks
274+ global_tag_map = resume_state .global_tag_map
275+ placeholder_format = resume_state .placeholder_format
276+ translated_chunks = resume_state .translated_chunks .copy ()
277+ start_chunk_index = resume_state .current_chunk_index
278+ html_content = resume_state .original_body_html
279+
280+ # Restore statistics
281+ stats = TranslationMetrics .from_dict (resume_state .stats ) if resume_state .stats else TranslationMetrics ()
282+
283+ # Restore tag_preserver
284+ tag_preserver = self .tag_preserver
285+ tag_preserver .placeholder_format .prefix = placeholder_format [0 ]
286+ tag_preserver .placeholder_format .suffix = placeholder_format [1 ]
287+
288+ # Restore context
289+ metadata = resume_state .doc_metadata
290+ context = {
291+ 'metadata' : metadata ,
292+ 'preserver' : tag_preserver ,
293+ 'source_path' : source_path
294+ }
295+
296+ else :
297+ # === NORMAL INITIALIZATION (NO RESUME) ===
298+ # 1. Extract content
299+ html_content , context = self .extract_content (source_path , log_callback )
300+
301+ # 2. Preserve structure
302+ text_with_placeholders , global_tag_map , placeholder_format = \
303+ self .preserve_structure (html_content , context , log_callback )
304+
305+ # 3. Create chunks
306+ chunks = self .create_chunks (
307+ text_with_placeholders ,
308+ global_tag_map ,
309+ max_tokens_per_chunk ,
310+ log_callback
311+ )
312+
313+ # Initialize variables for new translation
314+ translated_chunks = []
315+ start_chunk_index = 0
316+ stats = TranslationMetrics ()
317+ stats .total_chunks = len (chunks )
318+ tag_preserver = self .tag_preserver
319+ metadata = context ['metadata' ]
320+
321+ # 4. Translation with checkpoint support
322+ translated_chunks , stats , was_interrupted = await _translate_all_chunks_with_checkpoint (
323+ chunks = chunks ,
324+ source_language = source_language ,
325+ target_language = target_language ,
326+ model_name = model_name ,
327+ llm_client = llm_client ,
328+ max_retries = max_retries ,
329+ context_manager = context_manager ,
330+ placeholder_format = placeholder_format ,
331+ log_callback = log_callback ,
332+ stats_callback = stats_callback ,
333+ checkpoint_manager = checkpoint_manager ,
334+ translation_id = translation_id ,
335+ file_href = file_href ,
336+ file_path = source_path ,
337+ check_interruption_callback = check_interruption_callback ,
338+ start_chunk_index = start_chunk_index ,
339+ translated_chunks = translated_chunks ,
340+ global_tag_map = global_tag_map ,
341+ stats = stats ,
342+ prompt_options = prompt_options ,
343+ )
344+
345+ # If interrupted, save state and return partial result
346+ if was_interrupted :
347+ if log_callback :
348+ log_callback ("docx_interrupted" , "DOCX translation interrupted - state saved" )
349+ # Return empty bytes to indicate incomplete translation
350+ return b'' , stats
351+
352+ # 5. Reconstruct content
353+ if log_callback :
354+ log_callback ("reconstruct_start" , "Reconstructing DOCX content" )
355+
356+ reconstructed_html = self .reconstruct_content (
357+ translated_chunks ,
358+ global_tag_map ,
359+ context
360+ )
361+
362+ # 6. Finalize output
363+ docx_bytes = self .finalize_output (
364+ reconstructed_html ,
365+ source_path ,
366+ context ,
367+ log_callback
368+ )
369+
370+ return docx_bytes , stats
0 commit comments