|
32 | 32 | with open(README_PATH, "r", encoding="utf-8") as f: |
33 | 33 | original_text = f.read() |
34 | 34 |
|
35 | | -# --- PRE-PROCESSING: Protect Navigation Bar --- |
36 | | -nav_match = re.search(r'(<div align="center">.*?</div>)', original_text, re.DOTALL) |
37 | | -nav_placeholder = "[NAV_BAR_PROTECTED_BLOCK]" |
| 35 | +# --- PRE-PROCESSING: Protect Sensitive Blocks --- |
| 36 | +# We replace complex blocks with placeholders so the LLM cannot mangle them. |
| 37 | +protected_blocks = [] |
| 38 | + |
| 39 | +def protect_match(match): |
| 40 | + placeholder = f"[PROTECTED_BLOCK_{len(protected_blocks)}]" |
| 41 | + protected_blocks.append(match.group(0)) |
| 42 | + return placeholder |
| 43 | + |
38 | 44 | text_to_translate = original_text |
39 | | -if nav_match: |
40 | | - text_to_translate = text_to_translate.replace(nav_match.group(1), nav_placeholder) |
| 45 | + |
| 46 | +# 1. Protect Navigation Bar (<div align="center">...</div>) |
| 47 | +text_to_translate = re.sub(r'(<div align="center">.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL) |
| 48 | +# 2. Protect Logo Block (<div style="text-align:center...>) |
| 49 | +text_to_translate = re.sub(r'(<div style="text-align:center; margin:18px 0;">.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL) |
| 50 | +# 3. Protect Badges () - Prevents URL translation |
| 51 | +text_to_translate = re.sub(r'(!\[.*?\]\(https://img\.shields\.io/.*?\))', protect_match, text_to_translate) |
41 | 52 |
|
42 | 53 | # Refined Prompt for CJK and Technical Nuance |
43 | 54 | prompt = f"""<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> |
|
49 | 60 | - 'Enforcement' = Policy restriction/application (JA: 制限/強制, ZH: 强制执行). |
50 | 61 | - 'Headless' = Servers without a display (JA: ヘッドレス, ZH: 无头). |
51 | 62 | - 'Agnostic' = Independence (JA: 非依存, ZH: 无关性). |
52 | | -4. **Placeholders**: Return any text like '{nav_placeholder}' exactly as is. |
| 63 | +4. **Placeholders**: Return any text like '[PROTECTED_BLOCK_X]' exactly as is. |
53 | 64 | 5. **Output**: ONLY the translation. No conversational filler.<|END_OF_TURN_TOKEN|> |
54 | 65 | <|START_OF_TURN_TOKEN|><|USER_TOKEN|> |
55 | 66 | {text_to_translate}<|END_OF_TURN_TOKEN|> |
|
60 | 71 |
|
61 | 72 | # --- POST-PROCESSING --- |
62 | 73 |
|
63 | | -# 1. Restore Navigation Bar |
64 | | -if nav_match: |
65 | | - translated_content = translated_content.replace(nav_placeholder, nav_match.group(1)) |
66 | | - |
67 | | -# 2. Advanced Badge Restoration (Key-based) |
68 | | -# This handles cases where the LLM translates the URL parameters |
69 | | -badge_keys = ["license", "python", "version", "platform", "cuda"] |
70 | | -for key in badge_keys: |
71 | | - # Find the original badge line for this key |
72 | | - orig_badge = re.search(rf'(!\[.*?\]\(https://img\.shields\.io/badge/{key}.*?\))', original_text, re.I) |
73 | | - if orig_badge: |
74 | | - # Find and replace the translated version in the output |
75 | | - translated_content = re.sub(rf'!\[.*?\]\(https://img\.shields\.io/badge/{key}.*?\)', orig_badge.group(1), translated_content, flags=re.I) |
| 74 | +# 1. Restore Protected Blocks |
| 75 | +for i, block in enumerate(protected_blocks): |
| 76 | + translated_content = translated_content.replace(f"[PROTECTED_BLOCK_{i}]", block) |
76 | 77 |
|
77 | | -# 3. Path Correction (Support single and double quotes) |
| 78 | +# 2. Path Correction (Support single and double quotes) |
78 | 79 | translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content) |
79 | 80 | translated_content = re.sub(r'((?:src|href)=["\'])(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content) |
80 | 81 | translated_content = re.sub(r'(\[.*?\]\()locales/', r'\1', translated_content) |
|
0 commit comments