Skip to content

Commit 7dfdc0b

Browse files
committed
feat: setup translations using LLM
1 parent fa0947d commit 7dfdc0b

23 files changed

+1139
-861
lines changed

.github/scripts/translate.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import subprocess
5+
import sys
6+
from pathlib import Path
7+
from typing import Dict, Any, Set
8+
9+
10+
def load_json(file_path: Path) -> Dict[str, Any]:
11+
with open(file_path, 'r', encoding='utf-8') as f:
12+
return json.load(f)
13+
14+
15+
def save_json(file_path: Path, data: Dict[str, Any]) -> None:
16+
with open(file_path, 'w', encoding='utf-8') as f:
17+
json.dump(data, f, ensure_ascii=False, indent=2)
18+
f.write('\n')
19+
20+
21+
def get_changed_keys(en_file: Path) -> Set[str]:
22+
# Get git diff to find which keys were added or modified in en_US.json.
23+
print("Getting git diff...", flush=True)
24+
25+
try:
26+
result = subprocess.run(
27+
['git', 'diff', 'HEAD~1', 'HEAD', '--', str(en_file)],
28+
capture_output=True,
29+
text=True,
30+
check=False,
31+
cwd=en_file.parent.parent
32+
)
33+
34+
print(f"Git diff return code: {result.returncode}", flush=True)
35+
36+
if result.returncode != 0:
37+
print(f"Git diff error: {result.stderr}", flush=True)
38+
sys.exit(1)
39+
40+
if not result.stdout.strip():
41+
print("No diff found - file unchanged", flush=True)
42+
return set()
43+
44+
# Parse diff output to extract changed keys.
45+
changed_keys = set()
46+
for line in result.stdout.split('\n'):
47+
if line.startswith('+') and not line.startswith('+++'):
48+
content = line[1:].strip()
49+
if content.startswith('"') and '":' in content:
50+
try:
51+
key = content.split('"')[1]
52+
changed_keys.add(key)
53+
except IndexError:
54+
continue
55+
56+
return changed_keys
57+
58+
except Exception as e:
59+
print(f"Exception in get_changed_keys: {e}", flush=True)
60+
sys.exit(1)
61+
62+
63+
def translate_keys(keys_dict: Dict[str, str], target_language: str) -> Dict[str, str]:
64+
# Use LLM to translate English strings to target language.
65+
prompt = f"""You are a professional translator. Translate the following JSON object from English to {target_language}.
66+
67+
IMPORTANT RULES:
68+
1. Keep all JSON keys EXACTLY the same (do not translate keys)
69+
2. Only translate the VALUES
70+
3. Preserve any special formatting like quotes (\"\"), placeholders (\"M\", \"N\", \"X\", \"ENTRY\", \"PLAYLIST\", etc.)
71+
4. Maintain the same meaning, punctuation, capitalization, structure and formatting
72+
5. Return ONLY the translated JSON object, no additional text
73+
6. Ensure the output is valid JSON
74+
7. Try to keep the same string length as the original string (if possible)
75+
76+
Input JSON:
77+
{json.dumps(keys_dict, ensure_ascii=False, indent=2)}"""
78+
79+
print(f"Calling LLM...", flush=True)
80+
81+
try:
82+
result = subprocess.run(
83+
['llm', '-m', 'github/gpt-4o'],
84+
input=prompt,
85+
capture_output=True,
86+
text=True,
87+
check=False
88+
)
89+
90+
print(f"LLM returned with code {result.returncode}", flush=True)
91+
92+
if result.returncode != 0:
93+
print(f"Error: {result.stderr}", flush=True)
94+
return keys_dict
95+
96+
content = result.stdout.strip()
97+
98+
if not content:
99+
print(f"Empty response from LLM", flush=True)
100+
return keys_dict
101+
102+
# Strip markdown code block formatting if present.
103+
if content.startswith('```'):
104+
content = content.split('```')[1]
105+
if content.startswith('json'):
106+
content = content[4:]
107+
content = content.split('```')[0].strip()
108+
109+
try:
110+
return json.loads(content)
111+
except json.JSONDecodeError as e:
112+
print(f"JSON error: {e}", flush=True)
113+
print(f"Content: {content[:200]}...", flush=True)
114+
return keys_dict
115+
except Exception as e:
116+
print(f"Exception calling LLM: {e}", flush=True)
117+
return keys_dict
118+
119+
120+
def main():
121+
print("Starting translation script...", flush=True)
122+
123+
# Setup paths.
124+
script_dir = Path(__file__).parent
125+
project_root = script_dir.parent.parent
126+
localizations_dir = project_root / "localizations"
127+
index_file = project_root / "index.json"
128+
en_file = localizations_dir / "en_US.json"
129+
130+
print(f"Paths:", flush=True)
131+
print(f" project_root: {project_root}", flush=True)
132+
print(f" en_file: {en_file}", flush=True)
133+
134+
if not en_file.exists():
135+
print(f"Error: {en_file} not found", flush=True)
136+
sys.exit(1)
137+
138+
# Load English localization file.
139+
en_data = load_json(en_file)
140+
print(f"Loaded {len(en_data)} keys from en_US.json", flush=True)
141+
142+
# Get keys that were changed in the latest commit.
143+
changed_keys = get_changed_keys(en_file)
144+
145+
if not changed_keys:
146+
print("No changed keys found - nothing to translate", flush=True)
147+
sys.exit(0)
148+
149+
print(f"Found {len(changed_keys)} changed keys: {', '.join(sorted(changed_keys))}", flush=True)
150+
151+
# Load list of available languages from index.json.
152+
if not index_file.exists():
153+
print(f"Error: {index_file} not found", flush=True)
154+
sys.exit(1)
155+
156+
languages = load_json(index_file)
157+
print(f"Loaded {len(languages)} languages", flush=True)
158+
159+
# Translate changed keys for each language.
160+
for lang_info in languages:
161+
lang_code = lang_info['code']
162+
lang_name = lang_info['name']
163+
164+
# Skip English since it's the source language.
165+
if lang_code == 'en_US':
166+
continue
167+
168+
print(f"\n[{lang_code}] {lang_name}", flush=True)
169+
170+
target_file = localizations_dir / f"{lang_code}.json"
171+
existing_data = load_json(target_file) if target_file.exists() else {}
172+
173+
# Filter to only keys that need translation.
174+
keys_to_translate = {k: en_data[k] for k in changed_keys if k in en_data}
175+
176+
if not keys_to_translate:
177+
print("Up to date", flush=True)
178+
continue
179+
180+
print(f"Translating {len(keys_to_translate)} keys...", flush=True)
181+
182+
# Translate in batches to avoid overwhelming the LLM.
183+
batch_size = 50
184+
translated = {}
185+
keys = list(keys_to_translate.keys())
186+
187+
for i in range(0, len(keys), batch_size):
188+
batch_keys = keys[i:i + batch_size]
189+
batch_dict = {k: keys_to_translate[k] for k in batch_keys}
190+
191+
batch_num = i // batch_size + 1
192+
total_batches = (len(keys) + batch_size - 1) // batch_size
193+
print(f"Batch {batch_num}/{total_batches}", flush=True)
194+
195+
batch_translated = translate_keys(batch_dict, lang_name)
196+
translated.update(batch_translated)
197+
198+
# Merge translations with existing data and maintain key order from en_US.json.
199+
final_data = {**existing_data, **translated}
200+
ordered_data = {k: final_data.get(k, en_data[k]) for k in en_data.keys()}
201+
202+
save_json(target_file, ordered_data)
203+
print(f"✓ Saved", flush=True)
204+
205+
print("\n✓ Done", flush=True)
206+
207+
208+
if __name__ == "__main__":
209+
main()

.github/workflows/translate.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: Translate
2+
3+
on:
4+
push:
5+
branches: ["main", "master"]
6+
paths:
7+
- "localizations/en_US.json"
8+
9+
jobs:
10+
translate:
11+
if: github.event.pusher.name == 'alexmercerind' && github.event.pusher.email == 'saini123hitesh@gmail.com'
12+
runs-on: ubuntu-latest
13+
permissions:
14+
contents: write
15+
pull-requests: write
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v4
19+
with:
20+
fetch-depth: 0
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: "3.11"
25+
- name: Install llm-github-models CLI
26+
run: |
27+
pip install llm-github-models
28+
- name: Configure llm
29+
env:
30+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31+
run: |
32+
llm keys set github --value "$GITHUB_TOKEN"
33+
- name: Generate translations
34+
run: |
35+
python3 .github/scripts/translate.py
36+
- name: Check for changes
37+
id: check_changes
38+
run: |
39+
if git diff --quiet; then
40+
echo "has_changes=false" >> $GITHUB_OUTPUT
41+
else
42+
echo "has_changes=true" >> $GITHUB_OUTPUT
43+
fi
44+
- name: Create Pull Request
45+
if: steps.check_changes.outputs.has_changes == 'true'
46+
uses: peter-evans/create-pull-request@v6
47+
with:
48+
token: ${{ secrets.GITHUB_TOKEN }}
49+
commit-message: "chore: auto-translate localizations"
50+
title: "Auto-Translate Localizations"
51+
body: "This pull request contains automatically generated translations based on changes to `en_US.json`."
52+
branch: auto-translate-${{ github.run_number }}
53+
delete-branch: true
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI
1+
name: Validate
22

33
on:
44
push:
@@ -14,4 +14,4 @@ jobs:
1414
- name: Validate JSON syntax
1515
run: for file in `ls | grep '.json' `; do python -mjson.tool "$file" > /dev/null ; done
1616
- name: Validate index entries & localization values
17-
run: python3 .github/ci.py
17+
run: python3 .github/scripts/validate.py

0 commit comments

Comments
 (0)