Skip to content

Commit 912d3af

Browse files
committed
feat: Separate system and user prompts for all LLM providers
- Add PromptPair NamedTuple to return system/user prompts separately - Update OllamaProvider to use 'system' field in payload - Update OpenAICompatibleProvider to include system message in messages array - Update GeminiProvider to use 'systemInstruction' field - Refactor translator.py and subtitle_translator.py to use new prompt structure
1 parent cb1c47c commit 912d3af

File tree

6 files changed

+157
-84
lines changed

6 files changed

+157
-84
lines changed

prompts/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
11
"""
22
Prompts module for TranslateBookWithLLM
33
"""
4+
from prompts.prompts import (
5+
PromptPair,
6+
generate_translation_prompt,
7+
generate_subtitle_block_prompt,
8+
)
9+
10+
__all__ = [
11+
"PromptPair",
12+
"generate_translation_prompt",
13+
"generate_subtitle_block_prompt",
14+
]

prompts/prompts.py

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1-
from typing import List, Tuple
1+
from typing import List, Tuple, NamedTuple
22
from src.config import TRANSLATE_TAG_IN, TRANSLATE_TAG_OUT, INPUT_TAG_IN, INPUT_TAG_OUT
33

44

5+
class PromptPair(NamedTuple):
6+
"""A pair of system and user prompts for LLM translation."""
7+
system: str
8+
user: str
9+
10+
511
# ============================================================================
612
# SHARED PROMPT SECTIONS
713
# ============================================================================
@@ -88,7 +94,7 @@ def generate_translation_prompt(
8894
translate_tag_in: str = TRANSLATE_TAG_IN,
8995
translate_tag_out: str = TRANSLATE_TAG_OUT,
9096
fast_mode: bool = False
91-
) -> str:
97+
) -> PromptPair:
9298
"""
9399
Generate the translation prompt with all contextual elements.
94100
@@ -104,7 +110,7 @@ def generate_translation_prompt(
104110
fast_mode: If True, excludes placeholder preservation instructions (for pure text translation)
105111
106112
Returns:
107-
str: The complete prompt formatted for translation
113+
PromptPair: A named tuple with 'system' and 'user' prompts
108114
"""
109115
# Get target-language-specific example text for output format
110116
example_texts = {
@@ -134,8 +140,8 @@ def generate_translation_prompt(
134140
example_format=example_format_text
135141
)
136142

137-
# PROMPT - can be edited for custom usages
138-
role_and_instructions_block = f"""You are a professional {target_language} translator and writer.
143+
# SYSTEM PROMPT - Role and instructions (stable across requests)
144+
system_prompt = f"""You are a professional {target_language} translator and writer.
139145
140146
# CRITICAL: TARGET LANGUAGE IS {target_language.upper()}
141147
@@ -169,23 +175,20 @@ def generate_translation_prompt(
169175
Your entire translation output must be written in {target_language}.
170176
Do NOT write in {source_language} or any other language - ONLY {target_language.upper()}.
171177
172-
{output_format_section}
173-
"""
178+
{output_format_section}"""
174179

180+
# USER PROMPT - Context and content to translate (varies per request)
175181
previous_translation_block_text = ""
176182
if previous_translation_context and previous_translation_context.strip():
177-
previous_translation_block_text = f"""
178-
179-
# CONTEXT - Previous Paragraph
183+
previous_translation_block_text = f"""# CONTEXT - Previous Paragraph
180184
181185
For consistency and natural flow, here's what came immediately before:
182186
183187
{previous_translation_context}
184188
185189
"""
186190

187-
text_to_translate_block = f"""
188-
# TEXT TO TRANSLATE
191+
user_prompt = f"""{previous_translation_block_text}# TEXT TO TRANSLATE
189192
190193
{INPUT_TAG_IN}
191194
{main_content}
@@ -200,13 +203,7 @@ def generate_translation_prompt(
200203
201204
Provide your translation now:"""
202205

203-
parts = [part.strip() for part in [
204-
role_and_instructions_block,
205-
previous_translation_block_text,
206-
text_to_translate_block
207-
] if part]
208-
209-
return "\n\n".join(parts).strip()
206+
return PromptPair(system=system_prompt.strip(), user=user_prompt.strip())
210207

211208

212209
def generate_subtitle_block_prompt(
@@ -217,7 +214,7 @@ def generate_subtitle_block_prompt(
217214
translate_tag_in: str = TRANSLATE_TAG_IN,
218215
translate_tag_out: str = TRANSLATE_TAG_OUT,
219216
custom_instructions: str = ""
220-
) -> str:
217+
) -> PromptPair:
221218
"""
222219
Generate translation prompt for multiple subtitle blocks with index markers.
223220
@@ -231,7 +228,7 @@ def generate_subtitle_block_prompt(
231228
custom_instructions: Additional custom translation instructions
232229
233230
Returns:
234-
str: The complete prompt formatted for subtitle block translation
231+
PromptPair: A named tuple with 'system' and 'user' prompts
235232
"""
236233
# Build the output format section outside the f-string to avoid backslash issues in Python 3.11
237234
subtitle_additional_rules = "\n6. Each subtitle has an index marker: [index]text - PRESERVE these markers exactly\n7. Maintain line breaks between indexed subtitles"
@@ -255,8 +252,8 @@ def generate_subtitle_block_prompt(
255252
{custom_instructions.strip()}
256253
"""
257254

258-
# Enhanced instructions for subtitle translation
259-
role_and_instructions_block = f"""You are a professional {target_language} subtitle translator and dialogue adaptation specialist.
255+
# SYSTEM PROMPT - Role and instructions for subtitle translation
256+
system_prompt = f"""You are a professional {target_language} subtitle translator and dialogue adaptation specialist.
260257
261258
# CRITICAL: TARGET LANGUAGE IS {target_language.upper()}
262259
@@ -286,15 +283,12 @@ def generate_subtitle_block_prompt(
286283
Your entire subtitle translation must be written in {target_language}.
287284
Do NOT write in {source_language} or any other language - ONLY {target_language.upper()}.
288285
289-
{subtitle_output_format_section}
290-
"""
286+
{subtitle_output_format_section}"""
291287

292-
# Previous translation context
288+
# USER PROMPT - Context and subtitles to translate
293289
previous_translation_block_text = ""
294290
if previous_translation_block and previous_translation_block.strip():
295-
previous_translation_block_text = f"""
296-
297-
# CONTEXT - Previous Subtitle Block
291+
previous_translation_block_text = f"""# CONTEXT - Previous Subtitle Block
298292
299293
For continuity and consistency, here's the previous subtitle block:
300294
@@ -308,8 +302,7 @@ def generate_subtitle_block_prompt(
308302
# Join subtitles outside f-string to avoid Python 3.11 backslash issues
309303
formatted_subtitles_text = "\n".join(formatted_subtitles)
310304

311-
text_to_translate_block = f"""
312-
# SUBTITLES TO TRANSLATE
305+
user_prompt = f"""{previous_translation_block_text}# SUBTITLES TO TRANSLATE
313306
314307
{INPUT_TAG_IN}
315308
{formatted_subtitles_text}
@@ -325,10 +318,4 @@ def generate_subtitle_block_prompt(
325318
326319
Provide your translation now:"""
327320

328-
parts = [part.strip() for part in [
329-
role_and_instructions_block,
330-
previous_translation_block_text,
331-
text_to_translate_block
332-
] if part]
333-
334-
return "\n".join(parts).strip()
321+
return PromptPair(system=system_prompt.strip(), user=user_prompt.strip())

src/core/llm_client.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,30 @@ def _get_provider(self) -> LLMProvider:
2929
self._provider = create_llm_provider(self.provider_type, **self.provider_kwargs)
3030
return self._provider
3131

32-
async def make_request(self, prompt: str, model: Optional[str] = None,
33-
timeout: int = None) -> Optional[str]:
32+
async def make_request(self, prompt: str, model: Optional[str] = None,
33+
timeout: int = None, system_prompt: Optional[str] = None) -> Optional[str]:
3434
"""
3535
Make a request to the LLM API with error handling and retries
36-
36+
3737
Args:
38-
prompt: The prompt to send
38+
prompt: The user prompt to send (content to process)
3939
model: Model to use (defaults to instance model)
4040
timeout: Request timeout in seconds
41-
41+
system_prompt: Optional system prompt (role/instructions)
42+
4243
Returns:
4344
Raw response text or None if failed
4445
"""
4546
provider = self._get_provider()
46-
47+
4748
# Update model if specified
4849
if model:
4950
provider.model = model
50-
51+
5152
if timeout:
52-
return await provider.generate(prompt, timeout)
53+
return await provider.generate(prompt, timeout, system_prompt=system_prompt)
5354
else:
54-
return await provider.generate(prompt)
55+
return await provider.generate(prompt, system_prompt=system_prompt)
5556

5657
def extract_translation(self, response: str) -> Optional[str]:
5758
"""

src/core/llm_providers.py

Lines changed: 81 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@ class ContextOverflowError(Exception):
2222

2323
class LLMProvider(ABC):
2424
"""Abstract base class for LLM providers"""
25-
25+
2626
def __init__(self, model: str):
2727
self.model = model
2828
self._compiled_regex = re.compile(
29-
rf"{re.escape(TRANSLATE_TAG_IN)}(.*?){re.escape(TRANSLATE_TAG_OUT)}",
29+
rf"{re.escape(TRANSLATE_TAG_IN)}(.*?){re.escape(TRANSLATE_TAG_OUT)}",
3030
re.DOTALL
3131
)
3232
self._client = None
33-
33+
3434
async def _get_client(self) -> httpx.AsyncClient:
3535
"""Get or create a persistent HTTP client with connection pooling"""
3636
if self._client is None:
@@ -39,16 +39,27 @@ async def _get_client(self) -> httpx.AsyncClient:
3939
timeout=httpx.Timeout(REQUEST_TIMEOUT)
4040
)
4141
return self._client
42-
42+
4343
async def close(self):
4444
"""Close the HTTP client"""
4545
if self._client:
4646
await self._client.aclose()
4747
self._client = None
48-
48+
4949
@abstractmethod
50-
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT) -> Optional[str]:
51-
"""Generate text from prompt"""
50+
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
51+
system_prompt: Optional[str] = None) -> Optional[str]:
52+
"""
53+
Generate text from prompt.
54+
55+
Args:
56+
prompt: The user prompt (content to process)
57+
timeout: Request timeout in seconds
58+
system_prompt: Optional system prompt (role/instructions)
59+
60+
Returns:
61+
Generated text or None if failed
62+
"""
5263
pass
5364

5465
def extract_translation(self, response: str) -> Optional[str]:
@@ -134,9 +145,20 @@ def __init__(self, api_endpoint: str = API_ENDPOINT, model: str = DEFAULT_MODEL,
134145
self.api_endpoint = api_endpoint
135146
self.context_window = context_window
136147
self.log_callback = log_callback
137-
138-
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT) -> Optional[str]:
139-
"""Generate text using Ollama API"""
148+
149+
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
150+
system_prompt: Optional[str] = None) -> Optional[str]:
151+
"""
152+
Generate text using Ollama API.
153+
154+
Args:
155+
prompt: The user prompt (content to translate)
156+
timeout: Request timeout in seconds
157+
system_prompt: Optional system prompt (role/instructions)
158+
159+
Returns:
160+
Generated text or None if failed
161+
"""
140162
payload = {
141163
"model": self.model,
142164
"prompt": prompt,
@@ -147,6 +169,10 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT) -> Optiona
147169
}
148170
}
149171

172+
# Add system prompt if provided (Ollama supports 'system' field)
173+
if system_prompt:
174+
payload["system"] = system_prompt
175+
150176
client = await self._get_client()
151177
for attempt in range(MAX_TRANSLATION_ATTEMPTS):
152178
try:
@@ -254,21 +280,38 @@ async def get_model_context_size(self) -> int:
254280

255281
class OpenAICompatibleProvider(LLMProvider):
256282
"""OpenAI compatible API provider"""
257-
283+
258284
def __init__(self, api_endpoint: str, model: str, api_key: Optional[str] = None):
259285
super().__init__(model)
260286
self.api_endpoint = api_endpoint
261287
self.api_key = api_key
262-
263-
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT) -> Optional[str]:
264-
"""Generate text using an OpenAI compatible API"""
288+
289+
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
290+
system_prompt: Optional[str] = None) -> Optional[str]:
291+
"""
292+
Generate text using an OpenAI compatible API.
293+
294+
Args:
295+
prompt: The user prompt (content to translate)
296+
timeout: Request timeout in seconds
297+
system_prompt: Optional system prompt (role/instructions)
298+
299+
Returns:
300+
Generated text or None if failed
301+
"""
265302
headers = {"Content-Type": "application/json"}
266303
if self.api_key:
267304
headers["Authorization"] = f"Bearer {self.api_key}"
268305

306+
# Build messages array with optional system prompt
307+
messages = []
308+
if system_prompt:
309+
messages.append({"role": "system", "content": system_prompt})
310+
messages.append({"role": "user", "content": prompt})
311+
269312
payload = {
270313
"model": self.model,
271-
"messages": [{"role": "user", "content": prompt}],
314+
"messages": messages,
272315
"stream": False,
273316
}
274317

@@ -372,15 +415,27 @@ async def get_available_models(self) -> list[dict]:
372415
print(f"Error fetching Gemini models: {e}")
373416
return []
374417

375-
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT) -> Optional[str]:
376-
"""Generate text using Gemini API"""
418+
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
419+
system_prompt: Optional[str] = None) -> Optional[str]:
420+
"""
421+
Generate text using Gemini API.
422+
423+
Args:
424+
prompt: The user prompt (content to translate)
425+
timeout: Request timeout in seconds
426+
system_prompt: Optional system prompt (role/instructions)
427+
428+
Returns:
429+
Generated text or None if failed
430+
"""
377431
headers = {
378432
"Content-Type": "application/json",
379433
"x-goog-api-key": self.api_key
380434
}
381-
435+
382436
payload = {
383437
"contents": [{
438+
"role": "user",
384439
"parts": [{
385440
"text": prompt
386441
}]
@@ -390,6 +445,14 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT) -> Optiona
390445
"maxOutputTokens": 2048
391446
}
392447
}
448+
449+
# Add system instruction if provided (Gemini API supports systemInstruction field)
450+
if system_prompt:
451+
payload["systemInstruction"] = {
452+
"parts": [{
453+
"text": system_prompt
454+
}]
455+
}
393456

394457
# Debug logs removed - uncomment if needed for troubleshooting
395458
# print(f"[DEBUG] Gemini API URL: {self.api_endpoint}")

0 commit comments

Comments
 (0)