script fix

DataBoySu · DataBoySu · commit e90a9cd82997 · 2025-12-26T15:56:27.000+05:30
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@
   <img src="monitor/api/static/logo.png" alt="MyGPU logo"/>
 </div>
 
-> *MyGPU: Lightweight GPU Management Utility: a compact `nvidia-smi` wrapper with a clean web dashboard.*
+> *MyGPU: Lightweight GPU Management Utility: a compact `nvidia-smi` wrapper with an elegant web dashboard.*
 
 ![License](https://img.shields.io/badge/license-MIT-blue.svg)
 ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
diff --git a/scripts/translate.py b/scripts/translate.py
@@ -3,109 +3,82 @@
 import argparse
 from llama_cpp import Llama
 
-# Map language codes to full English names for the system prompt
 LANG_MAP = {
-    "de": "German",
-    "fr": "French",
+    "de": "German", 
+    "fr": "French", 
     "es": "Spanish",
-    "ja": "Japanese",
-    "zh": "Simplified Chinese",
-    "ru": "Russian",
-    "pt": "Portuguese",
+    "ja": "Japanese", 
+    "zh": "Chinese(Simplified)",
+    "ru": "Russian", 
+    "pt": "Portuguese", 
     "ko": "Korean",
 }
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--lang", type=str, required=True, help="Target language code (e.g., de, fr)")
+parser.add_argument("--lang", type=str, required=True)
 args = parser.parse_args()
-
 target_lang_name = LANG_MAP.get(args.lang, "English")
 
+# Path Configuration
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 README_PATH = os.path.join(BASE_DIR, "README.md")
 OUTPUT_DIR = os.path.join(BASE_DIR, "locales")
 OUTPUT_PATH = os.path.join(OUTPUT_DIR, f"README.{args.lang}.md")
 MODEL_PATH = os.path.join(BASE_DIR, "models", "aya-expanse-8b-q4_k_s.gguf")
 
-# Ensure output directory exists
 os.makedirs(OUTPUT_DIR, exist_ok=True)
-
-# Set n_ctx to 6144 as a safe middle ground for 8B model on 7GB RAM.
-# Added n_threads=2 to match GitHub Action runner vCPUs.
 llm = Llama(model_path=MODEL_PATH, n_ctx=6144, n_threads=2, verbose=False)
 
 with open(README_PATH, "r", encoding="utf-8") as f:
-    text_to_translate = f.read()
+    original_text = f.read()
+
+# --- PRE-PROCESSING: Protect Navigation Bar ---
+nav_match = re.search(r'(<div align="center">.*?</div>)', original_text, re.DOTALL)
+nav_placeholder = "[NAV_BAR_PROTECTED_BLOCK]"
+text_to_translate = original_text
+if nav_match:
+    text_to_translate = text_to_translate.replace(nav_match.group(1), nav_placeholder)
 
-# Aya Expanse specific header format system/user/chatbot turns
+# Refined Prompt for CJK and Technical Nuance
 prompt = f"""<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
-You are a professional technical translator specializing in software documentation (GitHub READMEs).
-Translate the provided README into professional developer-level {target_lang_name}.
+You are a professional technical translator. Translate this GitHub README into {target_lang_name}.
 CRITICAL RULES:
-1. **Badges**: Do NOT translate Markdown image syntax. Specifically, do NOT translate text inside square brackets `![...]` or parentheses `(...)` for badge lines (e.g., license, python, version badges).
-2. **Navigation**: Do NOT modify the top-level HTML navigation bar (`<div align="center">`).
-3. **Context**: Treat 'Enforcement' as 'System policy restriction' and 'Headless' as 'server without GUI'.
-4. **Technical Integrity**: Preserve standard terms (GPU, CLI, VRAM, SSH, Docker) in English.
-5. **Formatting**: Preserve all emojis and HTML/Markdown tags exactly.
-6. **No Talk**: Output ONLY the translated text. Do not include markdown code fences (```) around the entire output.<|END_OF_TURN_TOKEN|>
+1. **Badges**: Do NOT translate text inside `![...]` or `(...)` for image badges.
+2. **Standard Terms**: Keep terms like GPU, VRAM, CLI, API, CUDA, and Docker in English.
+3. **Context**: 
+   - 'Enforcement' = Policy restriction/application (JA: 制限/強制, ZH: 强制执行).
+   - 'Headless' = Servers without a display (JA: ヘッドレス, ZH: 无头).
+   - 'Agnostic' = Independence (JA: 非依存, ZH: 无关性).
+4. **Placeholders**: Return any text like '{nav_placeholder}' exactly as is.
+5. **Output**: ONLY the translation. No conversational filler.<|END_OF_TURN_TOKEN|>
 <|START_OF_TURN_TOKEN|><|USER_TOKEN|>
 {text_to_translate}<|END_OF_TURN_TOKEN|>
-<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
-"""
-# Do not translate translate badges. Keep them as it is.
-# IMPORTANT: Do not strip or modify the top-level HTML tags (like <div> or <img>) at the beginning of the file.
-# ONLY output the translated {target_lang_name} text. No talk, just translation.
-# max_tokens must be less than n_ctx (6144). 4096 leaves ~2000 tokens for README input.
-response = llm(
-    prompt, 
-    max_tokens=6144, 
-    temperature=0, # Set to 0 for maximum determinism in translation
-    stop=["<|END_OF_TURN_TOKEN|>", "<|START_OF_TURN_TOKEN|>"]
-)
+<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"""
 
+response = llm(prompt, max_tokens=6144, temperature=0, stop=["<|END_OF_TURN_TOKEN|>"])
 translated_content = response['choices'][0]['text'].strip()
 
-# 1. CLEANUP: Remove markdown code fences if the LLM included them
-if translated_content.startswith("```"):
-    lines = translated_content.splitlines()
-    if lines[0].startswith("```"):
-        lines = lines[1:]
-    if lines and lines[-1].strip().startswith("```"):
-        lines = lines[:-1]
-    translated_content = "\n".join(lines).strip()
+# --- POST-PROCESSING ---
 
-# 2. FIX PATHS: Handle relative paths for files in /locales/
-# We need to ensure that links to the root go up one level (../) 
-# but links to other files in the same /locales/ folder stay relative.
+# 1. Restore Navigation Bar
+if nav_match:
+    translated_content = translated_content.replace(nav_placeholder, nav_match.group(1))
 
-# Step 1: Prepend ../ to relative paths (ignoring external links, absolute paths, anchors, or locales/)
-# This targets Markdown links/images text and HTML src="path"/href="path"
-translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
-translated_content = re.sub(r'((?:src|href)=")(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
+# 2. Advanced Badge Restoration (Key-based)
+# This handles cases where the LLM translates the URL parameters
+badge_keys = ["license", "python", "version", "platform", "cuda"]
+for key in badge_keys:
+    # Find the original badge line for this key
+    orig_badge = re.search(rf'(!\[.*?\]\(https://img\.shields\.io/badge/{key}.*?\))', original_text, re.I)
+    if orig_badge:
+        # Find and replace the translated version in the output
+        translated_content = re.sub(rf'!\[.*?\]\(https://img\.shields\.io/badge/{key}.*?\)', orig_badge.group(1), translated_content, flags=re.I)
 
-# Step 2: Handle links that point to the locales directory.
-# Since the translated file is ALREADY in /locales/, we strip the 'locales/' prefix
-# so they point to the sibling files in the same directory.
+# 3. Path Correction (Support single and double quotes)
+translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
+translated_content = re.sub(r'((?:src|href)=["\'])(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content)
 translated_content = re.sub(r'(\[.*?\]\()locales/', r'\1', translated_content)
-translated_content = re.sub(r'((?:src|href)=")locales/', r'\1', translated_content)
-
-# 3. RESTORE BADGES: Ensure badges match the original English README exactly.
-# This fixes cases where the LLM translates the Alt text (e.g., ![License] -> ![Lizenz])
-# or slightly alters the URL.
-
-# Extract all shields.io badges from the original source text
-original_badges = re.findall(r'(!\[.*?\]\(https://img\.shields\.io/.*?\))', text_to_translate)
-
-for badge in original_badges:
-    # Extract the URL from the original badge to use as a key
-    match = re.search(r'\((https://img\.shields\.io/.*?)\)', badge)
-    if match:
-        url = match.group(1)
-        # Replace any markdown image in the translated text that has this URL
-        # with the exact original badge string.
-        translated_content = re.sub(rf'!\[.*?\]\({re.escape(url)}\)', lambda m: badge, translated_content)
+translated_content = re.sub(r'((?:src|href)=["\'])locales/', r'\1', translated_content)
 
 with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
-    f.write(translated_content)
-
-print(f"Translation to {target_lang_name} complete: {OUTPUT_PATH}")
+    f.write(translated_content)