Skip to content

Commit 5801cca

Browse files
committed
prompt fix
1 parent 60bf7fa commit 5801cca

File tree

3 files changed

+20
-30
lines changed

3 files changed

+20
-30
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<a href="locales/README.zh.md">🇨🇳 中文</a> |
88
<a href="locales/README.pt.md">🇵🇹 Português</a> |
99
<a href="locales/README.ko.md">🇰🇷 한국어</a> |
10-
<a href="README.hi.md">🇮🇳 Hindi</a>
10+
<a href="locales/README.hi.md">🇮🇳 हिंदी</a>
1111
</div>
1212

1313
<div style="text-align:center; margin:18px 0;">

scripts/translate_eastern.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
llm = Llama(model_path=MODEL_PATH, n_ctx=6144, n_threads=2, verbose=False)
2626

2727
with open(README_PATH, "r", encoding="utf-8") as f:
28-
lines = f.readlines()
28+
original_text = f.read()
2929

3030
# --- PRE-PROCESSING ---
3131
protected_blocks = []
@@ -35,17 +35,16 @@ def protect_match(match):
3535
protected_blocks.append(match.group(0))
3636
return placeholder
3737

38-
# Manual Line Protection (User Request)
39-
# Block 0: Lines 1-15 (Nav + Logo) -> lines[0:15]
40-
protected_blocks.append("".join(lines[0:15]))
41-
# Block 1: Lines 19-66 (Badges + Gallery) -> lines[18:66]
42-
protected_blocks.append("".join(lines[18:66]))
38+
text_to_translate = original_text
4339

44-
# Construct text: PB0 + Lines 16-18 (Quote) + PB1 + Lines 67+ (Body)
45-
text_to_translate = f"__PB_0__{''.join(lines[15:18])}__PB_1__{''.join(lines[66:])}"
46-
47-
# Protect any remaining images in the rest of the text
40+
# 1. Protect Navigation Bar (Whole Block)
41+
text_to_translate = re.sub(r'(<div\s+align=["\']center["\']\s*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
42+
# 2. Protect Logo (Whole Block)
43+
text_to_translate = re.sub(r'(<div\s+style=["\'][^"]*text-align:center[^"]*["\']\s*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
44+
# 3. Protect Markdown Images (Badges, Screenshots)
4845
text_to_translate = re.sub(r'(!\[[^\]\r\n]*\]\([^)\r\n]+\))', protect_match, text_to_translate)
46+
# 4. Protect HTML Tags (Preserves Gallery structure <details>, <summary>, <img> but exposes text)
47+
text_to_translate = re.sub(r'(<[^>]+>)', protect_match, text_to_translate)
4948

5049
# Specialized Prompt for CJK/Eastern Languages
5150
prompt = f"""<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
@@ -99,10 +98,6 @@ def protect_match(match):
9998
translated_content = block + "\n\n" + translated_content
10099

101100
# 4. Path Correction
102-
# Remove 'locales/' hallucination
103-
translated_content = re.sub(r'(\[.*?\]\()locales/', r'\1', translated_content)
104-
translated_content = re.sub(r'((?:src|href)=["\'])locales/', r'\1', translated_content)
105-
106101
# Prepend ../ to relative paths
107102
translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./))', r'\1../', translated_content)
108103
translated_content = re.sub(r'((?:src|href)=["\'])(?!(?:http|/|#|\.\./))', r'\1../', translated_content)

scripts/translate_western.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
llm = Llama(model_path=MODEL_PATH, n_ctx=6144, n_threads=2, verbose=False)
2626

2727
with open(README_PATH, "r", encoding="utf-8") as f:
28-
lines = f.readlines()
28+
original_text = f.read()
2929

3030
# --- PRE-PROCESSING: Protect Sensitive Blocks ---
3131
protected_blocks = []
@@ -36,20 +36,19 @@ def protect_match(match):
3636
protected_blocks.append(match.group(0))
3737
return placeholder
3838

39-
# Manual Line Protection (User Request)
40-
# Block 0: Lines 1-15 (Nav + Logo) -> lines[0:15]
41-
protected_blocks.append("".join(lines[0:15]))
42-
# Block 1: Lines 19-66 (Badges + Gallery) -> lines[18:66]
43-
protected_blocks.append("".join(lines[18:66]))
39+
text_to_translate = original_text
4440

45-
# Construct text: PB0 + Lines 16-18 (Quote) + PB1 + Lines 67+ (Body)
46-
text_to_translate = f"__PB_0__{''.join(lines[15:18])}__PB_1__{''.join(lines[66:])}"
47-
48-
# Protect any remaining images in the rest of the text
41+
# 1. Protect Navigation Bar (Whole Block)
42+
text_to_translate = re.sub(r'(<div\s+align=["\']center["\']\s*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
43+
# 2. Protect Logo (Whole Block)
44+
text_to_translate = re.sub(r'(<div\s+style=["\'][^"]*text-align:center[^"]*["\']\s*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
45+
# 3. Protect Markdown Images (Badges, Screenshots)
4946
text_to_translate = re.sub(r'(!\[[^\]\r\n]*\]\([^)\r\n]+\))', protect_match, text_to_translate)
47+
# 4. Protect HTML Tags (Preserves Gallery structure <details>, <summary>, <img> but exposes text)
48+
text_to_translate = re.sub(r'(<[^>]+>)', protect_match, text_to_translate)
5049

5150
prompt = f"""<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
52-
You are a professional technical translator. Translate the provided README into professional developer-level {target_lang_name}.
51+
You are a professional technical {target_lang_name} translator. Translate the provided README into professional developer-level {target_lang_name}.
5352
CRITICAL RULES:
5453
1. **Structure**: Keep the layout exactly the same.
5554
2. **Placeholders**: You will see placeholders like __PB_0__, __PB_1__. These are images or layout blocks. KEEP THEM EXACTLY AS IS. Do not move or translate them.
@@ -98,10 +97,6 @@ def protect_match(match):
9897
translated_content = block + "\n\n" + translated_content
9998

10099
# 4. Path Correction
101-
# Remove 'locales/' hallucination
102-
translated_content = re.sub(r'(\[.*?\]\()locales/', r'\1', translated_content)
103-
translated_content = re.sub(r'((?:src|href)=["\'])locales/', r'\1', translated_content)
104-
105100
# Prepend ../ to relative paths
106101
translated_content = re.sub(r'(\[.*?\]\()(?!(?:http|/|#|\.\./))', r'\1../', translated_content)
107102
translated_content = re.sub(r'((?:src|href)=["\'])(?!(?:http|/|#|\.\./))', r'\1../', translated_content)

0 commit comments

Comments
 (0)