Skip to content

Commit 8fa99f8

Browse files
authored
Add verbose JSON logging for translation runs (#853)
- New --log flag to save detailed per-file translation logs - Logs include: timing, line counts, content hashes, changed flag - Logs include API metadata: model, tokens, stop_reason, continuations - Workflow uploads logs as artifacts (7-day retention) - Helps debug cases where files appear unchanged unexpectedly
1 parent 8d0da5b commit 8fa99f8

File tree

2 files changed

+184
-15
lines changed

2 files changed

+184
-15
lines changed

.github/workflows/translate.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ jobs:
7575
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
7676
GITHUB_REPOSITORY: ${{ github.repository }}
7777
working-directory: _scripts
78-
run: uv run -q translate.py sync ${{ matrix.language }}
78+
run: uv run -q translate.py sync ${{ matrix.language }} --log translate-${{ matrix.language }}.json
7979

8080
- name: Dry run ${{ matrix.language }}
8181
if: ${{ inputs.dry_run }}
@@ -94,6 +94,15 @@ jobs:
9494
retention-days: 1
9595
if-no-files-found: ignore
9696

97+
- name: Upload translation log
98+
if: ${{ !inputs.dry_run && always() }}
99+
uses: actions/upload-artifact@v4
100+
with:
101+
name: log-${{ matrix.language }}
102+
path: _scripts/translate-${{ matrix.language }}.json
103+
retention-days: 7
104+
if-no-files-found: ignore
105+
97106
merge:
98107
needs: [detect, translate]
99108
if: always() && needs.detect.outputs.has_work == 'true' && !inputs.dry_run && !cancelled()

_scripts/translate.py

Lines changed: 174 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@
2323
from __future__ import annotations
2424

2525
import asyncio
26+
import hashlib
2627
import json
2728
import os
2829
import random
2930
import re
3031
import secrets
3132
import subprocess
32-
import sys
3333
import time
34-
from dataclasses import dataclass
34+
from dataclasses import asdict, dataclass
35+
from datetime import datetime
3536
from functools import lru_cache
3637
from pathlib import Path
3738
from typing import NamedTuple
@@ -559,35 +560,52 @@ async def _call_claude_once_async(
559560
raise RuntimeError("Unreachable")
560561

561562

563+
@dataclass
564+
class TranslationResult:
565+
"""Result from Claude API including metadata."""
566+
567+
text: str
568+
model: str
569+
input_tokens: int
570+
output_tokens: int
571+
stop_reason: str
572+
continuations: int
573+
574+
562575
async def call_claude_async(
563576
prompt: str, file_label: str = "", client: anthropic.AsyncAnthropic | None = None
564-
) -> str:
577+
) -> TranslationResult:
565578
"""Async version of call_claude with continuation support.
566579
567580
Args:
568581
prompt: The translation prompt
569582
file_label: Optional label for log messages
570583
client: Optional shared client (creates one if not provided)
584+
585+
Returns:
586+
TranslationResult with text and API metadata
571587
"""
572588
client = client or anthropic.AsyncAnthropic()
573589
messages: list[dict] = [{"role": "user", "content": prompt}]
574590

575591
# Initial request
576592
message = await _call_claude_once_async(client, messages, file_label)
577593
result_parts = [message.content[0].text]
594+
total_input_tokens = message.usage.input_tokens
595+
total_output_tokens = message.usage.output_tokens
578596

579597
# Handle continuations if response was truncated
580-
continuation = 0
598+
continuations = 0
581599
while message.stop_reason == "max_tokens":
582-
continuation += 1
583-
if continuation > MAX_CONTINUATIONS:
600+
continuations += 1
601+
if continuations > MAX_CONTINUATIONS:
584602
raise TranslationError(
585603
f"Response still incomplete after {MAX_CONTINUATIONS} continuations. "
586604
f"File may be too large to translate."
587605
)
588606
print(
589607
f" [{file_label}] Response truncated, continuation "
590-
f"{continuation}/{MAX_CONTINUATIONS}..."
608+
f"{continuations}/{MAX_CONTINUATIONS}..."
591609
)
592610

593611
messages.append({"role": "assistant", "content": message.content[0].text})
@@ -597,8 +615,17 @@ async def call_claude_async(
597615

598616
message = await _call_claude_once_async(client, messages, file_label)
599617
result_parts.append(message.content[0].text)
600-
601-
return "".join(result_parts).strip()
618+
total_input_tokens += message.usage.input_tokens
619+
total_output_tokens += message.usage.output_tokens
620+
621+
return TranslationResult(
622+
text="".join(result_parts).strip(),
623+
model=message.model,
624+
input_tokens=total_input_tokens,
625+
output_tokens=total_output_tokens,
626+
stop_reason=message.stop_reason,
627+
continuations=continuations,
628+
)
602629

603630

604631
def translate_file(tf: TranslationFile, console: Console) -> None:
@@ -622,6 +649,93 @@ def translate_file(tf: TranslationFile, console: Console) -> None:
622649
tf.lang_path.write_text(f"{result}\n", encoding="utf-8", newline="\n")
623650

624651

652+
@dataclass
653+
class FileLogEntry:
654+
"""Log entry for a single file translation."""
655+
656+
filename: str
657+
started_at: str
658+
finished_at: str = ""
659+
duration_s: float = 0.0
660+
input_lines: int = 0
661+
input_hash: str = ""
662+
output_lines: int = 0
663+
output_hash: str = ""
664+
changed: bool = False
665+
error: str = ""
666+
# API response metadata
667+
model: str = ""
668+
input_tokens: int = 0
669+
output_tokens: int = 0
670+
stop_reason: str = ""
671+
continuations: int = 0
672+
673+
674+
class TranslationLog:
675+
"""Verbose log for debugging translation runs."""
676+
677+
def __init__(
678+
self,
679+
log_path: Path,
680+
baseline: str,
681+
language: str,
682+
parallel: int,
683+
total_files: int,
684+
):
685+
self.path = log_path
686+
self.baseline = baseline
687+
self.language = language
688+
self.parallel = parallel
689+
self.total_files = total_files
690+
self.entries: list[FileLogEntry] = []
691+
self._lock = asyncio.Lock()
692+
self.started_at = datetime.now().isoformat()
693+
694+
async def add_entry(self, entry: FileLogEntry):
695+
async with self._lock:
696+
self.entries.append(entry)
697+
698+
def write(self):
699+
if not self.path:
700+
return
701+
data = {
702+
"started_at": self.started_at,
703+
"finished_at": datetime.now().isoformat(),
704+
"baseline": self.baseline,
705+
"language": self.language,
706+
"parallel": self.parallel,
707+
"total_files": self.total_files,
708+
"files_changed": sum(1 for e in self.entries if e.changed),
709+
"files_unchanged": sum(
710+
1 for e in self.entries if not e.changed and not e.error
711+
),
712+
"files_failed": sum(1 for e in self.entries if e.error),
713+
"total_input_tokens": sum(e.input_tokens for e in self.entries),
714+
"total_output_tokens": sum(e.output_tokens for e in self.entries),
715+
"files": [
716+
{
717+
"filename": e.filename,
718+
"started_at": e.started_at,
719+
"finished_at": e.finished_at,
720+
"duration_s": round(e.duration_s, 2),
721+
"input_lines": e.input_lines,
722+
"input_hash": e.input_hash,
723+
"output_lines": e.output_lines,
724+
"output_hash": e.output_hash,
725+
"changed": e.changed,
726+
"error": e.error,
727+
"model": e.model,
728+
"input_tokens": e.input_tokens,
729+
"output_tokens": e.output_tokens,
730+
"stop_reason": e.stop_reason,
731+
"continuations": e.continuations,
732+
}
733+
for e in sorted(self.entries, key=lambda x: x.started_at)
734+
],
735+
}
736+
self.path.write_text(json.dumps(data, indent=2), encoding="utf-8")
737+
738+
625739
class TranslationProgress:
626740
"""Track translation progress across parallel tasks."""
627741

@@ -690,28 +804,56 @@ async def translate_file_async(
690804
semaphore: asyncio.Semaphore,
691805
progress: TranslationProgress,
692806
client: anthropic.AsyncAnthropic,
807+
log: TranslationLog | None = None,
693808
) -> None:
694809
"""Translate a single file (async version for parallel execution)."""
695810
filename = str(tf.relative_path)
811+
entry = FileLogEntry(filename=filename, started_at=datetime.now().isoformat())
812+
start_time = time.time()
813+
696814
async with semaphore:
697815
await progress.start_one(filename)
698816
try:
699817
langs = get_languages()
700818
en_content = tf.en_path.read_text(encoding="utf-8")
701819
existing = tf.lang_path.read_text(encoding="utf-8") if tf.exists else None
702820

821+
entry.input_lines = en_content.count("\n") + 1
822+
entry.input_hash = hashlib.md5(en_content.encode()).hexdigest()[:12]
823+
703824
prompt = build_translation_prompt(
704825
tf.language, langs[tf.language], en_content, existing
705826
)
706827
result = await call_claude_async(prompt, filename, client)
828+
output_content = f"{result.text}\n"
829+
830+
# Log API response metadata
831+
entry.model = result.model
832+
entry.input_tokens = result.input_tokens
833+
entry.output_tokens = result.output_tokens
834+
entry.stop_reason = result.stop_reason
835+
entry.continuations = result.continuations
707836

708837
tf.lang_path.parent.mkdir(parents=True, exist_ok=True)
709-
tf.lang_path.write_text(f"{result}\n", encoding="utf-8", newline="\n")
838+
tf.lang_path.write_text(output_content, encoding="utf-8", newline="\n")
710839
post_process_file(tf.lang_path, tf.language)
840+
841+
# Compute hash and changed flag AFTER post-processing
842+
final_content = tf.lang_path.read_text(encoding="utf-8")
843+
entry.output_lines = final_content.count("\n")
844+
entry.output_hash = hashlib.md5(final_content.encode()).hexdigest()[:12]
845+
entry.changed = existing is None or existing != final_content
846+
711847
await progress.finish_one(filename, success=True)
712-
except Exception:
848+
except Exception as e:
849+
entry.error = str(e)
713850
await progress.finish_one(filename, success=False)
714851
raise
852+
finally:
853+
entry.finished_at = datetime.now().isoformat()
854+
entry.duration_s = time.time() - start_time
855+
if log:
856+
await log.add_entry(entry)
715857

716858

717859
# =============================================================================
@@ -1015,7 +1157,9 @@ def translate(
10151157
DEFAULT_PARALLEL = 50
10161158

10171159

1018-
async def _translate_all(files: list[TranslationFile], parallel: int) -> None:
1160+
async def _translate_all(
1161+
files: list[TranslationFile], parallel: int, log: TranslationLog | None = None
1162+
) -> None:
10191163
"""Translate all files in parallel with progress logging."""
10201164
# Pre-compute line counts once (used for sorting and progress tracking)
10211165
file_lines = {
@@ -1030,7 +1174,7 @@ async def _translate_all(files: list[TranslationFile], parallel: int) -> None:
10301174
client = anthropic.AsyncAnthropic()
10311175

10321176
# Create translation tasks
1033-
tasks = [translate_file_async(tf, semaphore, progress, client) for tf in files]
1177+
tasks = [translate_file_async(tf, semaphore, progress, client, log) for tf in files]
10341178

10351179
# Run translations with progress logger
10361180
logger_task = asyncio.create_task(_progress_logger(progress))
@@ -1043,6 +1187,11 @@ async def _translate_all(files: list[TranslationFile], parallel: int) -> None:
10431187
except asyncio.CancelledError:
10441188
pass
10451189

1190+
# Write log if enabled
1191+
if log:
1192+
log.write()
1193+
console.print(f"[dim]Log written to: {log.path}[/dim]")
1194+
10461195
# Print final status
10471196
console.print(f"[bold green]Translation complete:[/bold green] {progress.status()}")
10481197

@@ -1070,6 +1219,11 @@ def sync(
10701219
"-p",
10711220
help="Max concurrent translations (default: 50)",
10721221
),
1222+
log_file: Path | None = typer.Option(
1223+
None,
1224+
"--log",
1225+
help="Write detailed JSON log to file",
1226+
),
10731227
):
10741228
"""Sync translations: update outdated, add missing, remove orphaned."""
10751229
console = Console(force_terminal=True if os.getenv("GITHUB_ACTIONS") else None)
@@ -1138,7 +1292,13 @@ def sync(
11381292
print(
11391293
f"Translating {len(all_files)} files with parallel={parallel}. Largest files first."
11401294
)
1141-
asyncio.run(_translate_all(all_files, parallel))
1295+
# Set up verbose log if requested
1296+
log = (
1297+
TranslationLog(log_file, baseline, lang, parallel, len(all_files))
1298+
if log_file
1299+
else None
1300+
)
1301+
asyncio.run(_translate_all(all_files, parallel, log))
11421302

11431303
console.print("[green]✓ Sync complete[/green]")
11441304

0 commit comments

Comments
 (0)