Skip to content

Commit f247d51

Browse files
committed
.
1 parent d917c55 commit f247d51

File tree

5 files changed

+226
-8
lines changed

5 files changed

+226
-8
lines changed

src/core/adapters/translate_file.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,12 @@ async def translate_file(
163163
stats_callback=stats_callback,
164164
prompt_options=prompt_options,
165165
max_retries=1,
166-
context_manager=None
166+
context_manager=None,
167+
check_interruption_callback=check_interruption_callback,
168+
checkpoint_manager=checkpoint_manager,
169+
translation_id=translation_id
167170
)
168-
return result['success']
171+
return result.get('success', False)
169172

170173
# Map file extensions to adapters
171174
adapter_map = {

src/core/common/translation_orchestrator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,8 @@ async def translate(
299299
context_manager=context_manager,
300300
placeholder_format=placeholder_format,
301301
log_callback=log_callback,
302-
stats_callback=stats_callback
302+
stats_callback=stats_callback,
303+
check_interruption_callback=check_interruption_callback
303304
)
304305

305306
# 5. Refinement (optional)

src/core/docx/docx_translation_adapter.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,171 @@ def finalize_output(
200200
log_callback("docx_rebuilt", f"DOCX document reconstructed ({len(docx_bytes)} bytes)")
201201

202202
return docx_bytes
203+
204+
async def translate_content(
205+
self,
206+
raw_content: Any,
207+
structure_map: Dict[str, Any],
208+
context: Dict[str, Any],
209+
source_language: str,
210+
target_language: str,
211+
model_name: str,
212+
llm_client: Any,
213+
max_tokens_per_chunk: int,
214+
log_callback: Optional[Callable] = None,
215+
context_manager: Optional[Any] = None,
216+
max_retries: int = 1,
217+
prompt_options: Optional[Dict] = None,
218+
stats_callback: Optional[Callable] = None,
219+
checkpoint_manager: Optional[Any] = None,
220+
translation_id: Optional[str] = None,
221+
file_href: Optional[str] = None,
222+
check_interruption_callback: Optional[Callable] = None,
223+
resume_state: Optional[Any] = None,
224+
**kwargs
225+
) -> Tuple[bytes, Any]:
226+
"""
227+
Translate DOCX content with checkpoint support.
228+
229+
This method bypasses the generic orchestrator to use _translate_all_chunks_with_checkpoint
230+
for chunk-level interruption and resume support.
231+
232+
Args:
233+
raw_content: DOCX file path (str)
234+
structure_map: Not used (kept for interface compatibility)
235+
context: Context dict with preservation info
236+
source_language: Source language
237+
target_language: Target language
238+
model_name: Model name
239+
llm_client: LLM client
240+
max_tokens_per_chunk: Max tokens per chunk
241+
log_callback: Logging callback
242+
context_manager: Context manager
243+
max_retries: Max retries
244+
prompt_options: Prompt options
245+
stats_callback: Stats callback
246+
checkpoint_manager: Checkpoint manager for partial state
247+
translation_id: Translation ID for checkpointing
248+
file_href: File identifier for checkpointing (use filename for DOCX)
249+
check_interruption_callback: Interruption check callback
250+
resume_state: Resume state for partial translation
251+
**kwargs: Additional arguments
252+
253+
Returns:
254+
(docx_bytes, stats)
255+
"""
256+
from ..epub.xhtml_translator import _translate_all_chunks_with_checkpoint
257+
from ..epub.translation_metrics import TranslationMetrics
258+
259+
source_path = raw_content # DOCX file path
260+
261+
# Use filename as file_href if not provided
262+
if not file_href:
263+
import os
264+
file_href = os.path.basename(source_path)
265+
266+
# === RESUME FROM PARTIAL STATE ===
267+
if resume_state:
268+
if log_callback:
269+
log_callback("docx_resume_partial",
270+
f"📂 Resuming DOCX translation from chunk {resume_state.current_chunk_index}/{len(resume_state.chunks)}")
271+
272+
# Restore state from checkpoint
273+
chunks = resume_state.chunks
274+
global_tag_map = resume_state.global_tag_map
275+
placeholder_format = resume_state.placeholder_format
276+
translated_chunks = resume_state.translated_chunks.copy()
277+
start_chunk_index = resume_state.current_chunk_index
278+
html_content = resume_state.original_body_html
279+
280+
# Restore statistics
281+
stats = TranslationMetrics.from_dict(resume_state.stats) if resume_state.stats else TranslationMetrics()
282+
283+
# Restore tag_preserver
284+
tag_preserver = self.tag_preserver
285+
tag_preserver.placeholder_format.prefix = placeholder_format[0]
286+
tag_preserver.placeholder_format.suffix = placeholder_format[1]
287+
288+
# Restore context
289+
metadata = resume_state.doc_metadata
290+
context = {
291+
'metadata': metadata,
292+
'preserver': tag_preserver,
293+
'source_path': source_path
294+
}
295+
296+
else:
297+
# === NORMAL INITIALIZATION (NO RESUME) ===
298+
# 1. Extract content
299+
html_content, context = self.extract_content(source_path, log_callback)
300+
301+
# 2. Preserve structure
302+
text_with_placeholders, global_tag_map, placeholder_format = \
303+
self.preserve_structure(html_content, context, log_callback)
304+
305+
# 3. Create chunks
306+
chunks = self.create_chunks(
307+
text_with_placeholders,
308+
global_tag_map,
309+
max_tokens_per_chunk,
310+
log_callback
311+
)
312+
313+
# Initialize variables for new translation
314+
translated_chunks = []
315+
start_chunk_index = 0
316+
stats = TranslationMetrics()
317+
stats.total_chunks = len(chunks)
318+
tag_preserver = self.tag_preserver
319+
metadata = context['metadata']
320+
321+
# 4. Translation with checkpoint support
322+
translated_chunks, stats, was_interrupted = await _translate_all_chunks_with_checkpoint(
323+
chunks=chunks,
324+
source_language=source_language,
325+
target_language=target_language,
326+
model_name=model_name,
327+
llm_client=llm_client,
328+
max_retries=max_retries,
329+
context_manager=context_manager,
330+
placeholder_format=placeholder_format,
331+
log_callback=log_callback,
332+
stats_callback=stats_callback,
333+
checkpoint_manager=checkpoint_manager,
334+
translation_id=translation_id,
335+
file_href=file_href,
336+
file_path=source_path,
337+
check_interruption_callback=check_interruption_callback,
338+
start_chunk_index=start_chunk_index,
339+
translated_chunks=translated_chunks,
340+
global_tag_map=global_tag_map,
341+
stats=stats,
342+
prompt_options=prompt_options,
343+
)
344+
345+
# If interrupted, save state and return partial result
346+
if was_interrupted:
347+
if log_callback:
348+
log_callback("docx_interrupted", "DOCX translation interrupted - state saved")
349+
# Return empty bytes to indicate incomplete translation
350+
return b'', stats
351+
352+
# 5. Reconstruct content
353+
if log_callback:
354+
log_callback("reconstruct_start", "Reconstructing DOCX content")
355+
356+
reconstructed_html = self.reconstruct_content(
357+
translated_chunks,
358+
global_tag_map,
359+
context
360+
)
361+
362+
# 6. Finalize output
363+
docx_bytes = self.finalize_output(
364+
reconstructed_html,
365+
source_path,
366+
context,
367+
log_callback
368+
)
369+
370+
return docx_bytes, stats

src/core/docx/translator.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ async def translate_docx_file(
2323
prompt_options: Optional[Dict] = None,
2424
max_retries: int = 1,
2525
context_manager: Optional[Any] = None,
26+
check_interruption_callback: Optional[Callable] = None,
27+
checkpoint_manager: Optional[Any] = None,
28+
translation_id: Optional[str] = None,
2629
**kwargs
2730
) -> Dict[str, Any]:
2831
"""
@@ -45,20 +48,36 @@ async def translate_docx_file(
4548
model_name: LLM model name
4649
llm_client: LLM client instance
4750
max_tokens_per_chunk: Max tokens per chunk
48-
log_callback: Logging callback function stats_callback: Statistics callback function (called after each chunk)
51+
log_callback: Logging callback function
52+
stats_callback: Statistics callback function (called after each chunk)
4953
prompt_options: Prompt options (refinement, etc.)
5054
max_retries: Max translation retries
5155
context_manager: Adaptive context manager (optional)
56+
check_interruption_callback: Callback to check for interruption (optional)
57+
checkpoint_manager: Checkpoint manager for partial state (optional)
58+
translation_id: Translation ID for checkpointing (optional)
5259
**kwargs: Additional arguments
5360
5461
Returns:
5562
Dict with success, stats, output_path
5663
"""
64+
import os
65+
5766
# Create adapter and orchestrator
5867
adapter = DocxTranslationAdapter()
5968
orchestrator = GenericTranslationOrchestrator(adapter)
6069

61-
# Translate using generic pipeline
70+
# Use filename as file_href for checkpointing
71+
file_href = os.path.basename(input_filepath)
72+
73+
# Check for resume state
74+
resume_state = None
75+
if checkpoint_manager and translation_id:
76+
resume_state = checkpoint_manager.load_xhtml_partial_state(translation_id, file_href)
77+
if resume_state and log_callback:
78+
log_callback("docx_resume_detected", f"Found checkpoint at chunk {resume_state.current_chunk_index}")
79+
80+
# Translate using generic pipeline with checkpoint support
6281
docx_bytes, stats = await orchestrator.translate(
6382
source=input_filepath,
6483
source_language=source_language,
@@ -70,9 +89,25 @@ async def translate_docx_file(
7089
context_manager=context_manager,
7190
max_retries=max_retries,
7291
prompt_options=prompt_options,
73-
stats_callback=stats_callback
92+
stats_callback=stats_callback,
93+
check_interruption_callback=check_interruption_callback,
94+
checkpoint_manager=checkpoint_manager,
95+
translation_id=translation_id,
96+
file_href=file_href,
97+
resume_state=resume_state
7498
)
7599

100+
# Check if translation was interrupted
101+
if not docx_bytes:
102+
if log_callback:
103+
log_callback("docx_incomplete", "DOCX translation interrupted - partial state saved")
104+
return {
105+
'success': False,
106+
'stats': stats.to_dict(),
107+
'output_path': None,
108+
'interrupted': True
109+
}
110+
76111
# Save to output file
77112
with open(output_filepath, 'wb') as f:
78113
f.write(docx_bytes)

src/core/epub/xhtml_translator.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,8 @@ async def _translate_all_chunks(
888888
context_manager: Optional[AdaptiveContextManager],
889889
placeholder_format: Tuple[str, str],
890890
log_callback: Optional[Callable] = None,
891-
stats_callback: Optional[Callable] = None
891+
stats_callback: Optional[Callable] = None,
892+
check_interruption_callback: Optional[Callable] = None
892893
) -> Tuple[List[str], TranslationMetrics]:
893894
"""Translate all chunks with fallback.
894895
@@ -901,7 +902,9 @@ async def _translate_all_chunks(
901902
max_retries: Maximum retry attempts per chunk
902903
context_manager: Optional context window manager
903904
placeholder_format: Tuple of (prefix, suffix) for placeholders
904-
log_callback: Optional callback for progress stats_callback: Optional callback for stats updates
905+
log_callback: Optional callback for progress
906+
stats_callback: Optional callback for stats updates
907+
check_interruption_callback: Optional callback to check for interruption
905908
906909
Returns:
907910
Tuple of (translated_chunks, statistics)
@@ -918,6 +921,14 @@ async def _translate_all_chunks(
918921
stats_callback(stats.to_dict())
919922

920923
for i, chunk in enumerate(chunks):
924+
# Check for interruption before processing chunk
925+
if check_interruption_callback:
926+
should_stop = check_interruption_callback()
927+
if should_stop:
928+
if log_callback:
929+
log_callback("translation_interrupted", f"Translation interrupted at chunk {i}/{len(chunks)}")
930+
break
931+
921932
translated = await translate_chunk_with_fallback(
922933
chunk_text=chunk['text'],
923934
local_tag_map=chunk['local_tag_map'],

0 commit comments

Comments
 (0)