|
27 | 27 | with open(README_PATH, "r", encoding="utf-8") as f: |
28 | 28 | original_text = f.read() |
29 | 29 |
|
| 30 | +# --- PRE-PROCESSING --- |
| 31 | +protected_blocks = [] |
| 32 | + |
| 33 | +def protect_match(match): |
| 34 | + placeholder = f"__PB_{len(protected_blocks)}__" |
| 35 | + protected_blocks.append(match.group(0)) |
| 36 | + return placeholder |
| 37 | + |
30 | 38 | text_to_translate = original_text |
| 39 | +text_to_translate = re.sub(r'(<!--\s*b\s*-->.*?<!--\s*e\s*-->)', protect_match, text_to_translate, flags=re.DOTALL) |
31 | 40 |
|
32 | 41 | # Specialized Prompt for CJK/Eastern Languages |
33 | 42 | prompt = f"""<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> |
34 | 43 | You are a professional technical {target_lang_name} translator. |
35 | 44 | Your task is to translate the README into {target_lang_name}. |
36 | 45 | Keep the markdown format and HTML tags the same. |
37 | 46 | CRITICAL INSTRUCTIONS: |
38 | | -1. **Preserve HTML**: Do NOT translate or modify any HTML tags (lines starting with `<`). Output them exactly as they are in the source. |
39 | | -2. **Preserve Images**: Do NOT translate or modify any Markdown images or badges (lines starting with `![`). Output them exactly as they are in the source. Do NOT add spaces inside URLs. |
40 | | -3. **Translation**: Translate only the text content (paragraphs, headers, lists) into {target_lang_name}. |
41 | | -4. **Technical Terms**: Keep terms like GPU, CLI, VRAM, SSH, Docker, API, CUDA in English. |
42 | | -5. **Context**: |
| 47 | +1. **Placeholders**: Keep `__PB_0__`, `__PB_1__` etc. exactly as is. These are protected blocks. |
| 48 | +2. **Preserve HTML**: Do NOT translate or modify any HTML tags (lines starting with `<`). Output them exactly as they are in the source. |
| 49 | +3. **Preserve Images**: Do NOT translate or modify any Markdown images or badges (lines starting with `![`). Output them exactly as they are in the source. Do NOT add spaces inside URLs. |
| 50 | +4. **Translation**: Translate only the text content (paragraphs, headers, lists) into {target_lang_name}. |
| 51 | +5. **Technical Terms**: Keep terms like GPU, CLI, VRAM, SSH, Docker, API, CUDA in English. |
| 52 | +6. **Context**: |
43 | 53 | - 'Enforcement' = Policy restriction (e.g., JA: 制限/強制). |
44 | 54 | - 'Headless' = Server without GUI/display. |
45 | 55 | - 'Agnostic' = Hardware Independence (JA: 非依存, ZH: 无关性). |
46 | | -6. **No Conversational Text**: Output only the final Markdown file content. No "Here is the translation" or code fences. |
| 56 | +7. **No Conversational Text**: Output only the final Markdown file content. No "Here is the translation" or code fences. |
47 | 57 | <|END_OF_TURN_TOKEN|> |
48 | 58 | <|START_OF_TURN_TOKEN|><|USER_TOKEN|> |
49 | 59 | {text_to_translate}<|END_OF_TURN_TOKEN|> |
|
62 | 72 | lines = lines[:-1] |
63 | 73 | translated_content = "\n".join(lines).strip() |
64 | 74 |
|
| 75 | +# Restore Protected Blocks |
| 76 | +for i, block in enumerate(protected_blocks): |
| 77 | + placeholder = f"__PB_{i}__" |
| 78 | + translated_content = translated_content.replace(placeholder, block) |
| 79 | + |
65 | 80 | # 2. Path Correction |
66 | 81 | # Prepend ../ to relative paths |
67 | 82 | translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./))', r'\1../', translated_content) |
|
0 commit comments