Skip to content

Commit 8cf9db0

Browse files
committed
fix prompt system
1 parent 215b1e7 commit 8cf9db0

File tree

4 files changed

+50
-223
lines changed

4 files changed

+50
-223
lines changed

prompts/examples/technical_cache.json

Lines changed: 0 additions & 42 deletions
This file was deleted.
Lines changed: 26 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,24 @@
11
"""
2-
Dynamic technical example generator for translation prompts.
2+
Static technical example for translation prompts.
33
4-
This module generates simple, technical examples on-demand using the LLM.
5-
These examples demonstrate WHAT to preserve (placeholders) with simple
6-
sentences that present no translation difficulty.
4+
This module provides a simple, static English example demonstrating
5+
placeholder preservation. No LLM generation is used to avoid random errors.
76
87
For examples showing HOW to translate idiomatically (cultural adaptation,
98
avoiding literal translation), see cultural_examples.py.
109
"""
1110

12-
import asyncio
13-
import json
14-
from pathlib import Path
1511
from typing import Dict, Optional, Any
1612

1713
from .constants import TAG0, TAG1
1814

1915

20-
# Cache file location
21-
CACHE_FILE = Path(__file__).parent / "technical_cache.json"
22-
23-
# Simple source template - easy to translate, focus on technical preservation
24-
PLACEHOLDER_TEMPLATE_EN = f"This is {TAG0}important{TAG1} text."
25-
26-
27-
def _load_cache() -> Dict[str, Dict[str, Any]]:
28-
"""Load cached examples from file."""
29-
if CACHE_FILE.exists():
30-
try:
31-
with open(CACHE_FILE, "r", encoding="utf-8") as f:
32-
return json.load(f)
33-
except (json.JSONDecodeError, IOError):
34-
return {}
35-
return {}
36-
37-
38-
def _save_cache(cache: Dict[str, Dict[str, Any]]) -> None:
39-
"""Save cache to file."""
40-
try:
41-
with open(CACHE_FILE, "w", encoding="utf-8") as f:
42-
json.dump(cache, f, ensure_ascii=False, indent=2)
43-
except IOError as e:
44-
print(f"[WARNING] Could not save technical cache: {e}")
45-
46-
47-
def _get_cache_key(source_lang: str, target_lang: str, example_type: str) -> str:
48-
"""Generate cache key for a language pair and type."""
49-
return f"{source_lang.lower()}:{target_lang.lower()}:{example_type}"
16+
# Static English example for placeholder preservation
17+
STATIC_PLACEHOLDER_EXAMPLE = {
18+
"source": f"This is {TAG0}important{TAG1} text.",
19+
"correct": f"This is {TAG0}important{TAG1} text.",
20+
"wrong": "This is important text."
21+
}
5022

5123

5224
def get_cached_technical_example(
@@ -55,127 +27,26 @@ def get_cached_technical_example(
5527
example_type: str # "placeholder"
5628
) -> Optional[Dict[str, str]]:
5729
"""
58-
Get a cached technical example.
30+
Get the static technical example.
31+
32+
Always returns the same English example regardless of language pair.
5933
6034
Returns:
61-
Dict with "source", "correct", "wrong" or None if not cached.
35+
Dict with "source", "correct", "wrong".
6236
"""
63-
cache = _load_cache()
64-
key = _get_cache_key(source_lang, target_lang, example_type)
65-
return cache.get(key)
66-
67-
68-
def save_technical_example(
69-
source_lang: str,
70-
target_lang: str,
71-
example_type: str,
72-
example: Dict[str, str]
73-
) -> None:
74-
"""Save a generated example to the cache."""
75-
cache = _load_cache()
76-
key = _get_cache_key(source_lang, target_lang, example_type)
77-
cache[key] = example
78-
_save_cache(cache)
79-
37+
if example_type == "placeholder":
38+
return STATIC_PLACEHOLDER_EXAMPLE
39+
return None
8040

81-
def _build_placeholder_prompt(source_lang: str, target_lang: str) -> str:
82-
"""Build prompt to generate a placeholder preservation example."""
83-
return f"""Translate this simple sentence from {source_lang} to {target_lang}.
8441

85-
CRITICAL: Keep {TAG0} and {TAG1} EXACTLY as they appear. Do NOT modify them.
86-
87-
Sentence: {PLACEHOLDER_TEMPLATE_EN}
88-
89-
Reply with ONLY the translated sentence, nothing else."""
90-
91-
92-
async def generate_placeholder_example_async(
93-
source_lang: str,
94-
target_lang: str,
95-
provider: Any
96-
) -> Optional[Dict[str, str]]:
42+
def get_placeholder_example() -> Dict[str, str]:
9743
"""
98-
Generate a placeholder preservation example using the LLM.
99-
100-
Args:
101-
source_lang: Source language name
102-
target_lang: Target language name
103-
provider: An LLMProvider instance
44+
Get the static placeholder preservation example.
10445
10546
Returns:
106-
Dict with "source", "correct", "wrong" or None if failed.
47+
Dict with "source", "correct", "wrong" keys.
10748
"""
108-
try:
109-
# Get source sentence if not English
110-
if source_lang.lower() == "english":
111-
source_text = PLACEHOLDER_TEMPLATE_EN
112-
else:
113-
# First get the source sentence in the source language
114-
source_prompt = f'Translate to {source_lang}: "This is important text."\nReply with ONLY the translation.'
115-
source_response = await provider.generate(source_prompt, timeout=30)
116-
if not source_response:
117-
return None
118-
base_source = source_response.strip().strip('"\'')
119-
# Insert tags around "important" equivalent
120-
# For simplicity, just wrap the whole middle section
121-
source_text = f"{TAG0}{base_source}{TAG1}"
122-
123-
# Generate target translation
124-
if target_lang.lower() == "english":
125-
translated = PLACEHOLDER_TEMPLATE_EN
126-
else:
127-
prompt = _build_placeholder_prompt(source_lang, target_lang)
128-
response = await provider.generate(prompt, timeout=30)
129-
if not response:
130-
return None
131-
translated = response.strip().strip('"\'')
132-
133-
# Validate placeholders preserved
134-
if TAG0 not in translated or TAG1 not in translated:
135-
print(f"[WARNING] LLM did not preserve placeholders for {source_lang}->{target_lang}")
136-
return None
137-
138-
# Build wrong example (placeholders removed)
139-
wrong = translated.replace(TAG0, "").replace(TAG1, "")
140-
wrong = " ".join(wrong.split())
141-
142-
example = {
143-
"source": source_text,
144-
"correct": translated,
145-
"wrong": wrong
146-
}
147-
148-
save_technical_example(source_lang, target_lang, "placeholder", example)
149-
return example
150-
151-
except Exception as e:
152-
print(f"[WARNING] Failed to generate placeholder example: {e}")
153-
return None
154-
155-
156-
def generate_placeholder_example_sync(
157-
source_lang: str,
158-
target_lang: str,
159-
provider: Any
160-
) -> Optional[Dict[str, str]]:
161-
"""Synchronous wrapper for placeholder example generation."""
162-
try:
163-
loop = asyncio.get_event_loop()
164-
if loop.is_running():
165-
import concurrent.futures
166-
with concurrent.futures.ThreadPoolExecutor() as executor:
167-
future = executor.submit(
168-
asyncio.run,
169-
generate_placeholder_example_async(source_lang, target_lang, provider)
170-
)
171-
return future.result(timeout=60)
172-
else:
173-
return loop.run_until_complete(
174-
generate_placeholder_example_async(source_lang, target_lang, provider)
175-
)
176-
except Exception as e:
177-
print(f"[WARNING] Sync placeholder generation failed: {e}")
178-
return None
49+
return STATIC_PLACEHOLDER_EXAMPLE
17950

18051

18152
async def ensure_technical_examples_ready(
@@ -185,29 +56,17 @@ async def ensure_technical_examples_ready(
18556
fast_mode: bool = False
18657
) -> bool:
18758
"""
188-
Ensure technical examples exist for the language pair.
59+
Check if technical examples are ready.
18960
190-
Generates missing examples using the LLM if a provider is given.
61+
Always returns True since we use static examples.
19162
19263
Args:
193-
source_lang: Source language name
194-
target_lang: Target language name
195-
provider: Optional LLMProvider instance
64+
source_lang: Source language name (ignored)
65+
target_lang: Target language name (ignored)
66+
provider: Optional LLMProvider instance (ignored)
19667
fast_mode: If True, skips placeholder examples (not needed)
19768
19869
Returns:
199-
True if all required examples exist or were generated.
70+
True always (static examples are always available).
20071
"""
201-
if fast_mode:
202-
# Fast mode doesn't need placeholder examples
203-
return True
204-
205-
# Placeholder examples for standard mode
206-
if not get_cached_technical_example(source_lang, target_lang, "placeholder"):
207-
if provider:
208-
result = await generate_placeholder_example_async(source_lang, target_lang, provider)
209-
return result is not None
210-
else:
211-
return False
212-
21372
return True

src/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
# Token-based chunking configuration
9797
# When enabled, uses tiktoken to count tokens instead of lines for more consistent chunk sizes
9898
USE_TOKEN_CHUNKING = os.getenv('USE_TOKEN_CHUNKING', 'true').lower() == 'true'
99-
MAX_TOKENS_PER_CHUNK = int(os.getenv('MAX_TOKENS_PER_CHUNK', '800'))
99+
MAX_TOKENS_PER_CHUNK = int(os.getenv('MAX_TOKENS_PER_CHUNK', '450'))
100100
SOFT_LIMIT_RATIO = float(os.getenv('SOFT_LIMIT_RATIO', '0.8'))
101101

102102
# LLM Provider configuration

src/core/llm_providers.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -138,19 +138,23 @@ async def translate_text(self, prompt: str) -> Optional[str]:
138138

139139

140140
class OllamaProvider(LLMProvider):
141-
"""Ollama API provider"""
141+
"""Ollama API provider - uses /api/chat for proper think parameter support"""
142142

143143
def __init__(self, api_endpoint: str = API_ENDPOINT, model: str = DEFAULT_MODEL,
144144
context_window: int = OLLAMA_NUM_CTX, log_callback: Optional[Callable] = None):
145145
super().__init__(model)
146-
self.api_endpoint = api_endpoint
146+
# Convert /api/generate endpoint to /api/chat for proper think support
147+
self.api_endpoint = api_endpoint.replace('/api/generate', '/api/chat')
147148
self.context_window = context_window
148149
self.log_callback = log_callback
149150

150151
async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
151152
system_prompt: Optional[str] = None) -> Optional[str]:
152153
"""
153-
Generate text using Ollama API.
154+
Generate text using Ollama Chat API.
155+
156+
Uses /api/chat instead of /api/generate because the think parameter
157+
only works correctly with the chat API (verified with Ollama 0.13.5).
154158
155159
Args:
156160
prompt: The user prompt (content to translate)
@@ -160,23 +164,26 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
160164
Returns:
161165
Generated text or None if failed
162166
"""
167+
# Build messages array for chat API
168+
messages = []
169+
if system_prompt:
170+
messages.append({"role": "system", "content": system_prompt})
171+
messages.append({"role": "user", "content": prompt})
172+
163173
payload = {
164174
"model": self.model,
165-
"prompt": prompt,
175+
"messages": messages,
166176
"stream": False,
167177
"options": {
168178
"num_ctx": self.context_window,
169179
"truncate": False
170180
},
171-
# Disable thinking/reasoning mode for models like Qwen3
172-
# This prevents the model from generating <think>...</think> blocks
173-
"think": False
181+
# Enable thinking mode so Ollama separates thinking into a dedicated field
182+
# With think:true, the 'content' field is clean and 'thinking' contains reasoning
183+
# With think:false, Qwen3 still outputs reasoning but mixed into 'content'
184+
"think": True
174185
}
175186

176-
# Add system prompt if provided (Ollama supports 'system' field)
177-
if system_prompt:
178-
payload["system"] = system_prompt
179-
180187
client = await self._get_client()
181188
for attempt in range(MAX_TRANSLATION_ATTEMPTS):
182189
try:
@@ -193,7 +200,9 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
193200
f"Tokens: prompt={prompt_tokens}, response={response_tokens}, "
194201
f"total={total_tokens} (num_ctx={self.context_window})")
195202

196-
return response_json.get("response", "")
203+
# Extract content from chat API response format
204+
message = response_json.get("message", {})
205+
return message.get("content", "")
197206

198207
except httpx.TimeoutException:
199208
if attempt < MAX_TRANSLATION_ATTEMPTS - 1:
@@ -239,7 +248,8 @@ async def get_model_context_size(self) -> int:
239248
"""
240249
try:
241250
client = await self._get_client()
242-
show_endpoint = self.api_endpoint.replace('/api/generate', '/api/show')
251+
# Build /api/show endpoint from chat endpoint
252+
show_endpoint = self.api_endpoint.replace('/api/chat', '/api/show').replace('/api/generate', '/api/show')
243253

244254
response = await client.post(
245255
show_endpoint,

0 commit comments

Comments
 (0)