Skip to content

Commit 55884e8

Browse files
committed
prompt fix
1 parent 910c8ed commit 55884e8

File tree

2 files changed

+20
-15
lines changed

2 files changed

+20
-15
lines changed

scripts/translate_eastern.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
llm = Llama(model_path=MODEL_PATH, n_ctx=6144, n_threads=2, verbose=False)
2626

2727
with open(README_PATH, "r", encoding="utf-8") as f:
28-
original_text = f.read()
28+
lines = f.readlines()
2929

3030
# --- PRE-PROCESSING ---
3131
protected_blocks = []
@@ -35,13 +35,16 @@ def protect_match(match):
3535
protected_blocks.append(match.group(0))
3636
return placeholder
3737

38-
text_to_translate = original_text
38+
# Manual Line Protection (User Request)
39+
# Block 0: Lines 1-15 (Nav + Logo) -> lines[0:15]
40+
protected_blocks.append("".join(lines[0:15]))
41+
# Block 1: Lines 19-66 (Badges + Gallery) -> lines[18:66]
42+
protected_blocks.append("".join(lines[18:66]))
3943

40-
# 1. Protect Navigation Bar
41-
text_to_translate = re.sub(r'(<div\s+[^>]*align=["\']center["\'][^>]*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
42-
# 2. Protect Logo Block
43-
text_to_translate = re.sub(r'(<div\s+[^>]*style=["\'][^"\']*text-align:\s*center[^"\']*["\'][^>]*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
44-
# 3. Protect ALL Images (Badges + Gallery)
44+
# Construct text: PB0 + Lines 16-18 (Quote) + PB1 + Lines 67+ (Body)
45+
text_to_translate = f"__PB_0__{''.join(lines[15:18])}__PB_1__{''.join(lines[66:])}"
46+
47+
# Protect any remaining images in the rest of the text
4548
text_to_translate = re.sub(r'(!\[[^\]\r\n]*\]\([^)\r\n]+\))', protect_match, text_to_translate)
4649

4750
# Specialized Prompt for CJK/Eastern Languages

scripts/translate_western.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
llm = Llama(model_path=MODEL_PATH, n_ctx=6144, n_threads=2, verbose=False)
2626

2727
with open(README_PATH, "r", encoding="utf-8") as f:
28-
original_text = f.read()
28+
lines = f.readlines()
2929

3030
# --- PRE-PROCESSING: Protect Sensitive Blocks ---
3131
protected_blocks = []
@@ -36,14 +36,16 @@ def protect_match(match):
3636
protected_blocks.append(match.group(0))
3737
return placeholder
3838

39-
text_to_translate = original_text
39+
# Manual Line Protection (User Request)
40+
# Block 0: Lines 1-15 (Nav + Logo) -> lines[0:15]
41+
protected_blocks.append("".join(lines[0:15]))
42+
# Block 1: Lines 19-66 (Badges + Gallery) -> lines[18:66]
43+
protected_blocks.append("".join(lines[18:66]))
4044

41-
# 1. Protect Navigation Bar
42-
text_to_translate = re.sub(r'(<div\s+[^>]*align=["\']center["\'][^>]*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
43-
# 2. Protect Logo Block
44-
text_to_translate = re.sub(r'(<div\s+[^>]*style=["\'][^"\']*text-align:\s*center[^"\']*["\'][^>]*>.*?</div>)', protect_match, text_to_translate, flags=re.DOTALL | re.IGNORECASE)
45-
# 3. Protect ALL Images (Badges + Gallery)
46-
# This prevents the gallery from disappearing or badges being translated
45+
# Construct text: PB0 + Lines 16-18 (Quote) + PB1 + Lines 67+ (Body)
46+
text_to_translate = f"__PB_0__{''.join(lines[15:18])}__PB_1__{''.join(lines[66:])}"
47+
48+
# Protect any remaining images in the rest of the text
4749
text_to_translate = re.sub(r'(!\[[^\]\r\n]*\]\([^)\r\n]+\))', protect_match, text_to_translate)
4850

4951
prompt = f"""<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>

0 commit comments

Comments
 (0)