Skip to content

Commit e90a9cd

Browse files
committed
script fix
1 parent 365a2a6 commit e90a9cd

File tree

2 files changed

+47
-74
lines changed

2 files changed

+47
-74
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
<img src="monitor/api/static/logo.png" alt="MyGPU logo"/>
1414
</div>
1515

16-
> *MyGPU: Lightweight GPU Management Utility: a compact `nvidia-smi` wrapper with a clean web dashboard.*
16+
> *MyGPU: Lightweight GPU Management Utility: a compact `nvidia-smi` wrapper with an elegant web dashboard.*
1717
1818
![License](https://img.shields.io/badge/license-MIT-blue.svg)
1919
![Python](https://img.shields.io/badge/python-3.10%2B-blue)

scripts/translate.py

Lines changed: 46 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3,109 +3,82 @@
33
import argparse
44
from llama_cpp import Llama
55

6-
# Map language codes to full English names for the system prompt
76
LANG_MAP = {
8-
"de": "German",
9-
"fr": "French",
7+
"de": "German",
8+
"fr": "French",
109
"es": "Spanish",
11-
"ja": "Japanese",
12-
"zh": "Simplified Chinese",
13-
"ru": "Russian",
14-
"pt": "Portuguese",
10+
"ja": "Japanese",
11+
"zh": "Chinese(Simplified)",
12+
"ru": "Russian",
13+
"pt": "Portuguese",
1514
"ko": "Korean",
1615
}
1716

1817
parser = argparse.ArgumentParser()
19-
parser.add_argument("--lang", type=str, required=True, help="Target language code (e.g., de, fr)")
18+
parser.add_argument("--lang", type=str, required=True)
2019
args = parser.parse_args()
21-
2220
target_lang_name = LANG_MAP.get(args.lang, "English")
2321

22+
# Path Configuration
2423
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
2524
README_PATH = os.path.join(BASE_DIR, "README.md")
2625
OUTPUT_DIR = os.path.join(BASE_DIR, "locales")
2726
OUTPUT_PATH = os.path.join(OUTPUT_DIR, f"README.{args.lang}.md")
2827
MODEL_PATH = os.path.join(BASE_DIR, "models", "aya-expanse-8b-q4_k_s.gguf")
2928

30-
# Ensure output directory exists
3129
os.makedirs(OUTPUT_DIR, exist_ok=True)
32-
33-
# Set n_ctx to 6144 as a safe middle ground for 8B model on 7GB RAM.
34-
# Added n_threads=2 to match GitHub Action runner vCPUs.
3530
llm = Llama(model_path=MODEL_PATH, n_ctx=6144, n_threads=2, verbose=False)
3631

3732
with open(README_PATH, "r", encoding="utf-8") as f:
38-
text_to_translate = f.read()
33+
original_text = f.read()
34+
35+
# --- PRE-PROCESSING: Protect Navigation Bar ---
36+
nav_match = re.search(r'(<div align="center">.*?</div>)', original_text, re.DOTALL)
37+
nav_placeholder = "[NAV_BAR_PROTECTED_BLOCK]"
38+
text_to_translate = original_text
39+
if nav_match:
40+
text_to_translate = text_to_translate.replace(nav_match.group(1), nav_placeholder)
3941

40-
# Aya Expanse specific header format system/user/chatbot turns
42+
# Refined Prompt for CJK and Technical Nuance
4143
prompt = f"""<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
42-
You are a professional technical translator specializing in software documentation (GitHub READMEs).
43-
Translate the provided README into professional developer-level {target_lang_name}.
44+
You are a professional technical translator. Translate this GitHub README into {target_lang_name}.
4445
CRITICAL RULES:
45-
1. **Badges**: Do NOT translate Markdown image syntax. Specifically, do NOT translate text inside square brackets `![...]` or parentheses `(...)` for badge lines (e.g., license, python, version badges).
46-
2. **Navigation**: Do NOT modify the top-level HTML navigation bar (`<div align="center">`).
47-
3. **Context**: Treat 'Enforcement' as 'System policy restriction' and 'Headless' as 'server without GUI'.
48-
4. **Technical Integrity**: Preserve standard terms (GPU, CLI, VRAM, SSH, Docker) in English.
49-
5. **Formatting**: Preserve all emojis and HTML/Markdown tags exactly.
50-
6. **No Talk**: Output ONLY the translated text. Do not include markdown code fences (```) around the entire output.<|END_OF_TURN_TOKEN|>
46+
1. **Badges**: Do NOT translate text inside `![...]` or `(...)` for image badges.
47+
2. **Standard Terms**: Keep terms like GPU, VRAM, CLI, API, CUDA, and Docker in English.
48+
3. **Context**:
49+
- 'Enforcement' = Policy restriction/application (JA: 制限/強制, ZH: 强制执行).
50+
- 'Headless' = Servers without a display (JA: ヘッドレス, ZH: 无头).
51+
- 'Agnostic' = Independence (JA: 非依存, ZH: 无关性).
52+
4. **Placeholders**: Return any text like '{nav_placeholder}' exactly as is.
53+
5. **Output**: ONLY the translation. No conversational filler.<|END_OF_TURN_TOKEN|>
5154
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>
5255
{text_to_translate}<|END_OF_TURN_TOKEN|>
53-
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
54-
"""
55-
# Do not translate translate badges. Keep them as it is.
56-
# IMPORTANT: Do not strip or modify the top-level HTML tags (like <div> or <img>) at the beginning of the file.
57-
# ONLY output the translated {target_lang_name} text. No talk, just translation.
58-
# max_tokens must be less than n_ctx (6144). 4096 leaves ~2000 tokens for README input.
59-
response = llm(
60-
prompt,
61-
max_tokens=6144,
62-
temperature=0, # Set to 0 for maximum determinism in translation
63-
stop=["<|END_OF_TURN_TOKEN|>", "<|START_OF_TURN_TOKEN|>"]
64-
)
56+
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"""
6557

58+
response = llm(prompt, max_tokens=6144, temperature=0, stop=["<|END_OF_TURN_TOKEN|>"])
6659
translated_content = response['choices'][0]['text'].strip()
6760

68-
# 1. CLEANUP: Remove markdown code fences if the LLM included them
69-
if translated_content.startswith("```"):
70-
lines = translated_content.splitlines()
71-
if lines[0].startswith("```"):
72-
lines = lines[1:]
73-
if lines and lines[-1].strip().startswith("```"):
74-
lines = lines[:-1]
75-
translated_content = "\n".join(lines).strip()
61+
# --- POST-PROCESSING ---
7662

77-
# 2. FIX PATHS: Handle relative paths for files in /locales/
78-
# We need to ensure that links to the root go up one level (../)
79-
# but links to other files in the same /locales/ folder stay relative.
63+
# 1. Restore Navigation Bar
64+
if nav_match:
65+
translated_content = translated_content.replace(nav_placeholder, nav_match.group(1))
8066

81-
# Step 1: Prepend ../ to relative paths (ignoring external links, absolute paths, anchors, or locales/)
82-
# This targets Markdown links/images text and HTML src="path"/href="path"
83-
translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
84-
translated_content = re.sub(r'((?:src|href)=")(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
67+
# 2. Advanced Badge Restoration (Key-based)
68+
# This handles cases where the LLM translates the URL parameters
69+
badge_keys = ["license", "python", "version", "platform", "cuda"]
70+
for key in badge_keys:
71+
# Find the original badge line for this key
72+
orig_badge = re.search(rf'(!\[.*?\]\(https://img\.shields\.io/badge/{key}.*?\))', original_text, re.I)
73+
if orig_badge:
74+
# Find and replace the translated version in the output
75+
translated_content = re.sub(rf'!\[.*?\]\(https://img\.shields\.io/badge/{key}.*?\)', orig_badge.group(1), translated_content, flags=re.I)
8576

86-
# Step 2: Handle links that point to the locales directory.
87-
# Since the translated file is ALREADY in /locales/, we strip the 'locales/' prefix
88-
# so they point to the sibling files in the same directory.
77+
# 3. Path Correction (Support single and double quotes)
78+
translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
79+
translated_content = re.sub(r'((?:src|href)=["\'])(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
8980
translated_content = re.sub(r'(\[.*?\]\()locales/', r'\1', translated_content)
90-
translated_content = re.sub(r'((?:src|href)=")locales/', r'\1', translated_content)
91-
92-
# 3. RESTORE BADGES: Ensure badges match the original English README exactly.
93-
# This fixes cases where the LLM translates the Alt text (e.g., ![License] -> ![Lizenz])
94-
# or slightly alters the URL.
95-
96-
# Extract all shields.io badges from the original source text
97-
original_badges = re.findall(r'(!\[.*?\]\(https://img\.shields\.io/.*?\))', text_to_translate)
98-
99-
for badge in original_badges:
100-
# Extract the URL from the original badge to use as a key
101-
match = re.search(r'\((https://img\.shields\.io/.*?)\)', badge)
102-
if match:
103-
url = match.group(1)
104-
# Replace any markdown image in the translated text that has this URL
105-
# with the exact original badge string.
106-
translated_content = re.sub(rf'!\[.*?\]\({re.escape(url)}\)', lambda m: badge, translated_content)
81+
translated_content = re.sub(r'((?:src|href)=["\'])locales/', r'\1', translated_content)
10782

10883
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
109-
f.write(translated_content)
110-
111-
print(f"Translation to {target_lang_name} complete: {OUTPUT_PATH}")
84+
f.write(translated_content)

0 commit comments

Comments
 (0)