2323from __future__ import annotations
2424
2525import asyncio
26+ import hashlib
2627import json
2728import os
2829import random
2930import re
3031import secrets
3132import subprocess
32- import sys
3333import time
34- from dataclasses import dataclass
34+ from dataclasses import asdict , dataclass
35+ from datetime import datetime
3536from functools import lru_cache
3637from pathlib import Path
3738from typing import NamedTuple
@@ -559,35 +560,52 @@ async def _call_claude_once_async(
559560 raise RuntimeError ("Unreachable" )
560561
561562
563+ @dataclass
564+ class TranslationResult :
565+ """Result from Claude API including metadata."""
566+
567+ text : str
568+ model : str
569+ input_tokens : int
570+ output_tokens : int
571+ stop_reason : str
572+ continuations : int
573+
574+
562575async def call_claude_async (
563576 prompt : str , file_label : str = "" , client : anthropic .AsyncAnthropic | None = None
564- ) -> str :
577+ ) -> TranslationResult :
565578 """Async version of call_claude with continuation support.
566579
567580 Args:
568581 prompt: The translation prompt
569582 file_label: Optional label for log messages
570583 client: Optional shared client (creates one if not provided)
584+
585+ Returns:
586+ TranslationResult with text and API metadata
571587 """
572588 client = client or anthropic .AsyncAnthropic ()
573589 messages : list [dict ] = [{"role" : "user" , "content" : prompt }]
574590
575591 # Initial request
576592 message = await _call_claude_once_async (client , messages , file_label )
577593 result_parts = [message .content [0 ].text ]
594+ total_input_tokens = message .usage .input_tokens
595+ total_output_tokens = message .usage .output_tokens
578596
579597 # Handle continuations if response was truncated
580- continuation = 0
598+ continuations = 0
581599 while message .stop_reason == "max_tokens" :
582- continuation += 1
583- if continuation > MAX_CONTINUATIONS :
600+ continuations += 1
601+ if continuations > MAX_CONTINUATIONS :
584602 raise TranslationError (
585603 f"Response still incomplete after { MAX_CONTINUATIONS } continuations. "
586604 f"File may be too large to translate."
587605 )
588606 print (
589607 f" [{ file_label } ] Response truncated, continuation "
590- f"{ continuation } /{ MAX_CONTINUATIONS } ..."
608+ f"{ continuations } /{ MAX_CONTINUATIONS } ..."
591609 )
592610
593611 messages .append ({"role" : "assistant" , "content" : message .content [0 ].text })
@@ -597,8 +615,17 @@ async def call_claude_async(
597615
598616 message = await _call_claude_once_async (client , messages , file_label )
599617 result_parts .append (message .content [0 ].text )
600-
601- return "" .join (result_parts ).strip ()
618+ total_input_tokens += message .usage .input_tokens
619+ total_output_tokens += message .usage .output_tokens
620+
621+ return TranslationResult (
622+ text = "" .join (result_parts ).strip (),
623+ model = message .model ,
624+ input_tokens = total_input_tokens ,
625+ output_tokens = total_output_tokens ,
626+ stop_reason = message .stop_reason ,
627+ continuations = continuations ,
628+ )
602629
603630
604631def translate_file (tf : TranslationFile , console : Console ) -> None :
@@ -622,6 +649,93 @@ def translate_file(tf: TranslationFile, console: Console) -> None:
622649 tf .lang_path .write_text (f"{ result } \n " , encoding = "utf-8" , newline = "\n " )
623650
624651
652+ @dataclass
653+ class FileLogEntry :
654+ """Log entry for a single file translation."""
655+
656+ filename : str
657+ started_at : str
658+ finished_at : str = ""
659+ duration_s : float = 0.0
660+ input_lines : int = 0
661+ input_hash : str = ""
662+ output_lines : int = 0
663+ output_hash : str = ""
664+ changed : bool = False
665+ error : str = ""
666+ # API response metadata
667+ model : str = ""
668+ input_tokens : int = 0
669+ output_tokens : int = 0
670+ stop_reason : str = ""
671+ continuations : int = 0
672+
673+
674+ class TranslationLog :
675+ """Verbose log for debugging translation runs."""
676+
677+ def __init__ (
678+ self ,
679+ log_path : Path ,
680+ baseline : str ,
681+ language : str ,
682+ parallel : int ,
683+ total_files : int ,
684+ ):
685+ self .path = log_path
686+ self .baseline = baseline
687+ self .language = language
688+ self .parallel = parallel
689+ self .total_files = total_files
690+ self .entries : list [FileLogEntry ] = []
691+ self ._lock = asyncio .Lock ()
692+ self .started_at = datetime .now ().isoformat ()
693+
694+ async def add_entry (self , entry : FileLogEntry ):
695+ async with self ._lock :
696+ self .entries .append (entry )
697+
698+ def write (self ):
699+ if not self .path :
700+ return
701+ data = {
702+ "started_at" : self .started_at ,
703+ "finished_at" : datetime .now ().isoformat (),
704+ "baseline" : self .baseline ,
705+ "language" : self .language ,
706+ "parallel" : self .parallel ,
707+ "total_files" : self .total_files ,
708+ "files_changed" : sum (1 for e in self .entries if e .changed ),
709+ "files_unchanged" : sum (
710+ 1 for e in self .entries if not e .changed and not e .error
711+ ),
712+ "files_failed" : sum (1 for e in self .entries if e .error ),
713+ "total_input_tokens" : sum (e .input_tokens for e in self .entries ),
714+ "total_output_tokens" : sum (e .output_tokens for e in self .entries ),
715+ "files" : [
716+ {
717+ "filename" : e .filename ,
718+ "started_at" : e .started_at ,
719+ "finished_at" : e .finished_at ,
720+ "duration_s" : round (e .duration_s , 2 ),
721+ "input_lines" : e .input_lines ,
722+ "input_hash" : e .input_hash ,
723+ "output_lines" : e .output_lines ,
724+ "output_hash" : e .output_hash ,
725+ "changed" : e .changed ,
726+ "error" : e .error ,
727+ "model" : e .model ,
728+ "input_tokens" : e .input_tokens ,
729+ "output_tokens" : e .output_tokens ,
730+ "stop_reason" : e .stop_reason ,
731+ "continuations" : e .continuations ,
732+ }
733+ for e in sorted (self .entries , key = lambda x : x .started_at )
734+ ],
735+ }
736+ self .path .write_text (json .dumps (data , indent = 2 ), encoding = "utf-8" )
737+
738+
625739class TranslationProgress :
626740 """Track translation progress across parallel tasks."""
627741
@@ -690,28 +804,56 @@ async def translate_file_async(
690804 semaphore : asyncio .Semaphore ,
691805 progress : TranslationProgress ,
692806 client : anthropic .AsyncAnthropic ,
807+ log : TranslationLog | None = None ,
693808) -> None :
694809 """Translate a single file (async version for parallel execution)."""
695810 filename = str (tf .relative_path )
811+ entry = FileLogEntry (filename = filename , started_at = datetime .now ().isoformat ())
812+ start_time = time .time ()
813+
696814 async with semaphore :
697815 await progress .start_one (filename )
698816 try :
699817 langs = get_languages ()
700818 en_content = tf .en_path .read_text (encoding = "utf-8" )
701819 existing = tf .lang_path .read_text (encoding = "utf-8" ) if tf .exists else None
702820
821+ entry .input_lines = en_content .count ("\n " ) + 1
822+ entry .input_hash = hashlib .md5 (en_content .encode ()).hexdigest ()[:12 ]
823+
703824 prompt = build_translation_prompt (
704825 tf .language , langs [tf .language ], en_content , existing
705826 )
706827 result = await call_claude_async (prompt , filename , client )
828+ output_content = f"{ result .text } \n "
829+
830+ # Log API response metadata
831+ entry .model = result .model
832+ entry .input_tokens = result .input_tokens
833+ entry .output_tokens = result .output_tokens
834+ entry .stop_reason = result .stop_reason
835+ entry .continuations = result .continuations
707836
708837 tf .lang_path .parent .mkdir (parents = True , exist_ok = True )
709- tf .lang_path .write_text (f" { result } \n " , encoding = "utf-8" , newline = "\n " )
838+ tf .lang_path .write_text (output_content , encoding = "utf-8" , newline = "\n " )
710839 post_process_file (tf .lang_path , tf .language )
840+
841+ # Compute hash and changed flag AFTER post-processing
842+ final_content = tf .lang_path .read_text (encoding = "utf-8" )
843+ entry .output_lines = final_content .count ("\n " )
844+ entry .output_hash = hashlib .md5 (final_content .encode ()).hexdigest ()[:12 ]
845+ entry .changed = existing is None or existing != final_content
846+
711847 await progress .finish_one (filename , success = True )
712- except Exception :
848+ except Exception as e :
849+ entry .error = str (e )
713850 await progress .finish_one (filename , success = False )
714851 raise
852+ finally :
853+ entry .finished_at = datetime .now ().isoformat ()
854+ entry .duration_s = time .time () - start_time
855+ if log :
856+ await log .add_entry (entry )
715857
716858
717859# =============================================================================
@@ -1015,7 +1157,9 @@ def translate(
10151157DEFAULT_PARALLEL = 50
10161158
10171159
1018- async def _translate_all (files : list [TranslationFile ], parallel : int ) -> None :
1160+ async def _translate_all (
1161+ files : list [TranslationFile ], parallel : int , log : TranslationLog | None = None
1162+ ) -> None :
10191163 """Translate all files in parallel with progress logging."""
10201164 # Pre-compute line counts once (used for sorting and progress tracking)
10211165 file_lines = {
@@ -1030,7 +1174,7 @@ async def _translate_all(files: list[TranslationFile], parallel: int) -> None:
10301174 client = anthropic .AsyncAnthropic ()
10311175
10321176 # Create translation tasks
1033- tasks = [translate_file_async (tf , semaphore , progress , client ) for tf in files ]
1177+ tasks = [translate_file_async (tf , semaphore , progress , client , log ) for tf in files ]
10341178
10351179 # Run translations with progress logger
10361180 logger_task = asyncio .create_task (_progress_logger (progress ))
@@ -1043,6 +1187,11 @@ async def _translate_all(files: list[TranslationFile], parallel: int) -> None:
10431187 except asyncio .CancelledError :
10441188 pass
10451189
1190+ # Write log if enabled
1191+ if log :
1192+ log .write ()
1193+ console .print (f"[dim]Log written to: { log .path } [/dim]" )
1194+
10461195 # Print final status
10471196 console .print (f"[bold green]Translation complete:[/bold green] { progress .status ()} " )
10481197
@@ -1070,6 +1219,11 @@ def sync(
10701219 "-p" ,
10711220 help = "Max concurrent translations (default: 50)" ,
10721221 ),
1222+ log_file : Path | None = typer .Option (
1223+ None ,
1224+ "--log" ,
1225+ help = "Write detailed JSON log to file" ,
1226+ ),
10731227):
10741228 """Sync translations: update outdated, add missing, remove orphaned."""
10751229 console = Console (force_terminal = True if os .getenv ("GITHUB_ACTIONS" ) else None )
@@ -1138,7 +1292,13 @@ def sync(
11381292 print (
11391293 f"Translating { len (all_files )} files with parallel={ parallel } . Largest files first."
11401294 )
1141- asyncio .run (_translate_all (all_files , parallel ))
1295+ # Set up verbose log if requested
1296+ log = (
1297+ TranslationLog (log_file , baseline , lang , parallel , len (all_files ))
1298+ if log_file
1299+ else None
1300+ )
1301+ asyncio .run (_translate_all (all_files , parallel , log ))
11421302
11431303 console .print ("[green]✓ Sync complete[/green]" )
11441304
0 commit comments