|
75 | 75 | translated_content = "\n".join(lines).strip() |
76 | 76 |
|
77 | 77 | # 2. FIX PATHS: Handle relative paths for files in /locales/ |
78 | | -# First, remove "locales/" if the LLM hallucinated it into the path |
| 78 | +# We need to ensure that links to the root go up one level (../) |
| 79 | +# but links to other files in the same /locales/ folder stay relative. |
| 80 | + |
| 81 | +# Step 1: Prepend ../ to relative paths (ignoring external links, absolute paths, anchors, or locales/) |
| 82 | +# This targets Markdown links/images text and HTML src="path"/href="path" |
| 83 | +translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content) |
| 84 | +translated_content = re.sub(r'((?:src|href)=")(?!(?:http|/|#|\.\./|locales/))', r'\1../', translated_content) |
| 85 | + |
| 86 | +# Step 2: Handle links that point to the locales directory. |
| 87 | +# Since the translated file is ALREADY in /locales/, we strip the 'locales/' prefix |
| 88 | +# so they point to the sibling files in the same directory. |
79 | 89 | translated_content = re.sub(r'(\[.*?\]\()locales/', r'\1', translated_content) |
80 | 90 | translated_content = re.sub(r'((?:src|href)=")locales/', r'\1', translated_content) |
81 | 91 |
|
82 | | -# Then, prepend ../ to relative paths (ignoring external links, absolute paths, or anchors) |
83 | | -# This targets Markdown links/images text and HTML src="path"/href="path" |
84 | | -translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./))', r'\1../', translated_content) |
85 | | -translated_content = re.sub(r'((?:src|href)=")(?!(?:http|/|#|\.\./))', r'\1../', translated_content) |
86 | | - |
87 | | -# List of badges that should NEVER be translated |
88 | | -protected_badges = ["License", "Python", "Version", "Platform", "cuda 12.x"] |
89 | | - |
90 | | -for badge in protected_badges: |
91 | | - # This regex finds translated versions of badges by looking for the |
92 | | - # specific Shields.io URL and replacing label back to the original. |
93 | | - # Pattern matches:  |
94 | | - pattern = rf'!\[.*?\]\(https://img\.shields\.io/badge/{badge.lower()}.*?\)' |
95 | | - original_badge_line = f"" # Map your original lines here |
| 92 | +# 3. RESTORE BADGES: Ensure badges match the original English README exactly. |
| 93 | +# This fixes cases where the LLM translates the Alt text (e.g., ![License] -> ![Lizenz]) |
| 94 | +# or slightly alters the URL. |
| 95 | + |
| 96 | +# Extract all shields.io badges from the original source text |
| 97 | +original_badges = re.findall(r'(!\[.*?\]\(https://img\.shields\.io/.*?\))', text_to_translate) |
| 98 | + |
| 99 | +for badge in original_badges: |
| 100 | + # Extract the URL from the original badge to use as a key |
| 101 | + match = re.search(r'\((https://img\.shields\.io/.*?)\)', badge) |
| 102 | + if match: |
| 103 | + url = match.group(1) |
| 104 | + # Replace any markdown image in the translated text that has this URL |
| 105 | + # with the exact original badge string. |
| 106 | + translated_content = re.sub(rf'!\[.*?\]\({re.escape(url)}\)', lambda m: badge, translated_content) |
96 | 107 |
|
97 | 108 | with open(OUTPUT_PATH, "w", encoding="utf-8") as f: |
98 | 109 | f.write(translated_content) |
|
0 commit comments