Skip to content

Commit b16de18

Browse files
committed
skip tok.json from auto translations
1 parent cee752a commit b16de18

File tree

1 file changed

+41
-35
lines changed

1 file changed

+41
-35
lines changed

.github/scripts/translate.py

Lines changed: 41 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
BATCH_SIZE = 50
1111
LLM_MODEL = 'github/gpt-4o'
1212
SOURCE_LANGUAGE = 'en_US'
13+
SKIP_LANGUAGES = {'tok'}
1314

1415

1516
def load_json(file_path: Path) -> Dict[str, Any]:
@@ -18,7 +19,7 @@ def load_json(file_path: Path) -> Dict[str, Any]:
1819
with open(file_path, 'r', encoding='utf-8') as f:
1920
return json.load(f)
2021
except (FileNotFoundError, json.JSONDecodeError) as e:
21-
print(f"Error loading {file_path}: {e}", flush=True)
22+
print(f"Error loading {file_path}: {e}")
2223
raise
2324

2425

@@ -30,13 +31,13 @@ def save_json(file_path: Path, data: Dict[str, Any]) -> None:
3031
json.dump(data, f, ensure_ascii=False, indent=2)
3132
f.write('\n')
3233
except Exception as e:
33-
print(f"Error saving {file_path}: {e}", flush=True)
34+
print(f"Error saving {file_path}: {e}")
3435
raise
3536

3637

3738
def get_changed_keys(en_file: Path) -> Set[str]:
3839
"""Extract changed keys from git diff of the English localization file."""
39-
print("Getting git diff...", flush=True)
40+
print("Getting git diff...")
4041

4142
try:
4243
result = subprocess.run(
@@ -47,14 +48,14 @@ def get_changed_keys(en_file: Path) -> Set[str]:
4748
cwd=en_file.parent.parent
4849
)
4950

50-
print(f"Git diff return code: {result.returncode}", flush=True)
51+
print(f"Git diff return code: {result.returncode}")
5152

5253
if result.returncode != 0:
53-
print(f"Git diff error: {result.stderr}", flush=True)
54+
print(f"Git diff error: {result.stderr}")
5455
sys.exit(1)
5556

5657
if not result.stdout.strip():
57-
print("No diff found - file unchanged", flush=True)
58+
print("No diff found - file unchanged")
5859
return set()
5960

6061
# Parse diff output to extract changed keys using regex for better accuracy
@@ -69,10 +70,10 @@ def get_changed_keys(en_file: Path) -> Set[str]:
6970
return changed_keys
7071

7172
except subprocess.TimeoutExpired:
72-
print("Git diff timed out", flush=True)
73+
print("Git diff timed out")
7374
sys.exit(1)
7475
except Exception as e:
75-
print(f"Exception in get_changed_keys: {e}", flush=True)
76+
print(f"Exception in get_changed_keys: {e}")
7677
sys.exit(1)
7778

7879

@@ -109,17 +110,17 @@ def call_llm(prompt: str) -> Optional[str]:
109110
stdout, stderr = process.communicate(input=prompt, timeout=300)
110111

111112
if process.returncode != 0:
112-
print(f"LLM error: {stderr}", flush=True)
113+
print(f"LLM error: {stderr}")
113114
return None
114115

115116
return stdout.strip() if stdout.strip() else None
116117

117118
except subprocess.TimeoutExpired:
118-
print("LLM call timed out", flush=True)
119+
print("LLM call timed out")
119120
process.kill()
120121
return None
121122
except Exception as e:
122-
print(f"Exception calling LLM: {e}", flush=True)
123+
print(f"Exception calling LLM: {e}")
123124
return None
124125

125126

@@ -166,16 +167,16 @@ def translate_keys(
166167
if not keys_dict:
167168
return {}
168169

169-
print(f"Calling LLM...", flush=True)
170+
print(f"Calling LLM...")
170171

171172
prompt = build_translation_prompt(keys_dict, target_language, full_en_data, existing_target_data)
172173
response = call_llm(prompt)
173174

174175
if not response:
175-
print("Empty or failed LLM response, returning original keys", flush=True)
176+
print("Empty or failed LLM response, returning original keys")
176177
return keys_dict
177178

178-
print(f"LLM returned successfully", flush=True)
179+
print(f"LLM returned successfully")
179180

180181
# Strip markdown formatting
181182
content = strip_markdown_code_block(response)
@@ -186,21 +187,21 @@ def translate_keys(
186187

187188
# Validate that all keys are present
188189
if not isinstance(translated, dict):
189-
print("LLM response is not a dictionary", flush=True)
190+
print("LLM response is not a dictionary")
190191
return keys_dict
191192

192193
missing_keys = set(keys_dict.keys()) - set(translated.keys())
193194
if missing_keys:
194-
print(f"Warning: Missing keys in translation: {missing_keys}", flush=True)
195+
print(f"Warning: Missing keys in translation: {missing_keys}")
195196
# Fill in missing keys with original values
196197
for key in missing_keys:
197198
translated[key] = keys_dict[key]
198199

199200
return translated
200201

201202
except json.JSONDecodeError as e:
202-
print(f"JSON decode error: {e}", flush=True)
203-
print(f"Content preview: {content[:500]}...", flush=True)
203+
print(f"JSON decode error: {e}")
204+
print(f"Content preview: {content[:500]}...")
204205
return keys_dict
205206

206207

@@ -214,10 +215,10 @@ def translate_language(
214215
) -> bool:
215216
"""Translate all keys for a specific language."""
216217
if not keys_to_translate:
217-
print("Up to date", flush=True)
218+
print("Up to date")
218219
return False
219220

220-
print(f"Translating {len(keys_to_translate)} keys...", flush=True)
221+
print(f"Translating {len(keys_to_translate)} keys...")
221222

222223
# Translate in batches
223224
translated = {}
@@ -229,7 +230,7 @@ def translate_language(
229230
batch_dict = {k: keys_to_translate[k] for k in batch_keys}
230231

231232
batch_num = i // BATCH_SIZE + 1
232-
print(f"Batch {batch_num}/{total_batches} ({len(batch_keys)} keys)", flush=True)
233+
print(f"Batch {batch_num}/{total_batches} ({len(batch_keys)} keys)")
233234

234235
batch_translated = translate_keys(batch_dict, lang_name, en_data, existing_data)
235236
translated.update(batch_translated)
@@ -241,14 +242,14 @@ def translate_language(
241242
# Save the updated translations
242243
target_file = localizations_dir / f"{lang_code}.json"
243244
save_json(target_file, ordered_data)
244-
print(f"✓ Saved to {target_file.name}", flush=True)
245+
print(f"✓ Saved to {target_file.name}")
245246

246247
return True
247248

248249

249250
def main() -> None:
250251
"""Main entry point for the translation script."""
251-
print("Starting translation script...", flush=True)
252+
print("Starting translation script...")
252253

253254
# Setup paths
254255
script_dir = Path(__file__).parent
@@ -257,39 +258,39 @@ def main() -> None:
257258
index_file = project_root / "index.json"
258259
en_file = localizations_dir / f"{SOURCE_LANGUAGE}.json"
259260

260-
print(f"Paths:", flush=True)
261-
print(f" project_root: {project_root}", flush=True)
262-
print(f" en_file: {en_file}", flush=True)
261+
print(f"Paths:")
262+
print(f" project_root: {project_root}")
263+
print(f" en_file: {en_file}")
263264

264265
# Validate English localization file exists
265266
if not en_file.exists():
266-
print(f"Error: {en_file} not found", flush=True)
267+
print(f"Error: {en_file} not found")
267268
sys.exit(1)
268269

269270
# Load English localization file
270271
try:
271272
en_data = load_json(en_file)
272-
print(f"Loaded {len(en_data)} keys from {SOURCE_LANGUAGE}.json", flush=True)
273+
print(f"Loaded {len(en_data)} keys from {SOURCE_LANGUAGE}.json")
273274
except Exception:
274275
sys.exit(1)
275276

276277
# Get keys that were changed in the latest commit
277278
changed_keys = get_changed_keys(en_file)
278279

279280
if not changed_keys:
280-
print("No changed keys found - nothing to translate", flush=True)
281+
print("No changed keys found - nothing to translate")
281282
sys.exit(0)
282283

283-
print(f"Found {len(changed_keys)} changed keys: {', '.join(sorted(changed_keys))}", flush=True)
284+
print(f"Found {len(changed_keys)} changed keys: {', '.join(sorted(changed_keys))}")
284285

285286
# Load list of available languages from index.json
286287
if not index_file.exists():
287-
print(f"Error: {index_file} not found", flush=True)
288+
print(f"Error: {index_file} not found")
288289
sys.exit(1)
289290

290291
try:
291292
languages = load_json(index_file)
292-
print(f"Loaded {len(languages)} languages", flush=True)
293+
print(f"Loaded {len(languages)} languages")
293294
except Exception:
294295
sys.exit(1)
295296

@@ -301,14 +302,19 @@ def main() -> None:
301302
lang_name = lang_info.get('name')
302303

303304
if not lang_code or not lang_name:
304-
print(f"Warning: Invalid language entry: {lang_info}", flush=True)
305+
print(f"Warning: Invalid language entry: {lang_info}")
305306
continue
306307

307308
# Skip English since it's the source language
308309
if lang_code == SOURCE_LANGUAGE:
309310
continue
310311

311-
print(f"\n[{lang_code}] {lang_name}", flush=True)
312+
# Skip languages that should not be auto-translated
313+
if lang_code in SKIP_LANGUAGES:
314+
print(f"\n[{lang_code}] {lang_name} - Skipped (manual translation only)")
315+
continue
316+
317+
print(f"\n[{lang_code}] {lang_name}")
312318

313319
# Load existing translations for this language
314320
target_file = localizations_dir / f"{lang_code}.json"
@@ -321,7 +327,7 @@ def main() -> None:
321327
if translate_language(lang_code, lang_name, keys_to_translate, en_data, existing_data, localizations_dir):
322328
translated_count += 1
323329

324-
print(f"\n✓ Done - translated {translated_count} language(s)", flush=True)
330+
print(f"\n✓ Done - translated {translated_count} language(s)")
325331

326332

327333
if __name__ == "__main__":

0 commit comments

Comments
 (0)