fix prompt system

hydropix · hydropix · commit 8cf9db0e52e6 · 2025-12-28T22:52:40.000+01:00
diff --git a/prompts/examples/technical_cache.json b/prompts/examples/technical_cache.json
diff --git a/prompts/examples/technical_generator.py b/prompts/examples/technical_generator.py
@@ -1,52 +1,24 @@
 """
-Dynamic technical example generator for translation prompts.
+Static technical example for translation prompts.
 
-This module generates simple, technical examples on-demand using the LLM.
-These examples demonstrate WHAT to preserve (placeholders) with simple
-sentences that present no translation difficulty.
+This module provides a simple, static English example demonstrating
+placeholder preservation. No LLM generation is used to avoid random errors.
 
 For examples showing HOW to translate idiomatically (cultural adaptation,
 avoiding literal translation), see cultural_examples.py.
 """
 
-import asyncio
-import json
-from pathlib import Path
 from typing import Dict, Optional, Any
 
 from .constants import TAG0, TAG1
 
 
-# Cache file location
-CACHE_FILE = Path(__file__).parent / "technical_cache.json"
-
-# Simple source template - easy to translate, focus on technical preservation
-PLACEHOLDER_TEMPLATE_EN = f"This is {TAG0}important{TAG1} text."
-
-
-def _load_cache() -> Dict[str, Dict[str, Any]]:
-    """Load cached examples from file."""
-    if CACHE_FILE.exists():
-        try:
-            with open(CACHE_FILE, "r", encoding="utf-8") as f:
-                return json.load(f)
-        except (json.JSONDecodeError, IOError):
-            return {}
-    return {}
-
-
-def _save_cache(cache: Dict[str, Dict[str, Any]]) -> None:
-    """Save cache to file."""
-    try:
-        with open(CACHE_FILE, "w", encoding="utf-8") as f:
-            json.dump(cache, f, ensure_ascii=False, indent=2)
-    except IOError as e:
-        print(f"[WARNING] Could not save technical cache: {e}")
-
-
-def _get_cache_key(source_lang: str, target_lang: str, example_type: str) -> str:
-    """Generate cache key for a language pair and type."""
-    return f"{source_lang.lower()}:{target_lang.lower()}:{example_type}"
+# Static English example for placeholder preservation
+STATIC_PLACEHOLDER_EXAMPLE = {
+    "source": f"This is {TAG0}important{TAG1} text.",
+    "correct": f"This is {TAG0}important{TAG1} text.",
+    "wrong": "This is important text."
+}
 
 
 def get_cached_technical_example(
@@ -55,127 +27,26 @@ def get_cached_technical_example(
     example_type: str  # "placeholder"
 ) -> Optional[Dict[str, str]]:
     """
-    Get a cached technical example.
+    Get the static technical example.
+
+    Always returns the same English example regardless of language pair.
 
     Returns:
-        Dict with "source", "correct", "wrong" or None if not cached.
+        Dict with "source", "correct", "wrong".
     """
-    cache = _load_cache()
-    key = _get_cache_key(source_lang, target_lang, example_type)
-    return cache.get(key)
-
-
-def save_technical_example(
-    source_lang: str,
-    target_lang: str,
-    example_type: str,
-    example: Dict[str, str]
-) -> None:
-    """Save a generated example to the cache."""
-    cache = _load_cache()
-    key = _get_cache_key(source_lang, target_lang, example_type)
-    cache[key] = example
-    _save_cache(cache)
-
+    if example_type == "placeholder":
+        return STATIC_PLACEHOLDER_EXAMPLE
+    return None
 
-def _build_placeholder_prompt(source_lang: str, target_lang: str) -> str:
-    """Build prompt to generate a placeholder preservation example."""
-    return f"""Translate this simple sentence from {source_lang} to {target_lang}.
 
-CRITICAL: Keep {TAG0} and {TAG1} EXACTLY as they appear. Do NOT modify them.
-
-Sentence: {PLACEHOLDER_TEMPLATE_EN}
-
-Reply with ONLY the translated sentence, nothing else."""
-
-
-async def generate_placeholder_example_async(
-    source_lang: str,
-    target_lang: str,
-    provider: Any
-) -> Optional[Dict[str, str]]:
+def get_placeholder_example() -> Dict[str, str]:
     """
-    Generate a placeholder preservation example using the LLM.
-
-    Args:
-        source_lang: Source language name
-        target_lang: Target language name
-        provider: An LLMProvider instance
+    Get the static placeholder preservation example.
 
     Returns:
-        Dict with "source", "correct", "wrong" or None if failed.
+        Dict with "source", "correct", "wrong" keys.
     """
-    try:
-        # Get source sentence if not English
-        if source_lang.lower() == "english":
-            source_text = PLACEHOLDER_TEMPLATE_EN
-        else:
-            # First get the source sentence in the source language
-            source_prompt = f'Translate to {source_lang}: "This is important text."\nReply with ONLY the translation.'
-            source_response = await provider.generate(source_prompt, timeout=30)
-            if not source_response:
-                return None
-            base_source = source_response.strip().strip('"\'')
-            # Insert tags around "important" equivalent
-            # For simplicity, just wrap the whole middle section
-            source_text = f"{TAG0}{base_source}{TAG1}"
-
-        # Generate target translation
-        if target_lang.lower() == "english":
-            translated = PLACEHOLDER_TEMPLATE_EN
-        else:
-            prompt = _build_placeholder_prompt(source_lang, target_lang)
-            response = await provider.generate(prompt, timeout=30)
-            if not response:
-                return None
-            translated = response.strip().strip('"\'')
-
-        # Validate placeholders preserved
-        if TAG0 not in translated or TAG1 not in translated:
-            print(f"[WARNING] LLM did not preserve placeholders for {source_lang}->{target_lang}")
-            return None
-
-        # Build wrong example (placeholders removed)
-        wrong = translated.replace(TAG0, "").replace(TAG1, "")
-        wrong = " ".join(wrong.split())
-
-        example = {
-            "source": source_text,
-            "correct": translated,
-            "wrong": wrong
-        }
-
-        save_technical_example(source_lang, target_lang, "placeholder", example)
-        return example
-
-    except Exception as e:
-        print(f"[WARNING] Failed to generate placeholder example: {e}")
-        return None
-
-
-def generate_placeholder_example_sync(
-    source_lang: str,
-    target_lang: str,
-    provider: Any
-) -> Optional[Dict[str, str]]:
-    """Synchronous wrapper for placeholder example generation."""
-    try:
-        loop = asyncio.get_event_loop()
-        if loop.is_running():
-            import concurrent.futures
-            with concurrent.futures.ThreadPoolExecutor() as executor:
-                future = executor.submit(
-                    asyncio.run,
-                    generate_placeholder_example_async(source_lang, target_lang, provider)
-                )
-                return future.result(timeout=60)
-        else:
-            return loop.run_until_complete(
-                generate_placeholder_example_async(source_lang, target_lang, provider)
-            )
-    except Exception as e:
-        print(f"[WARNING] Sync placeholder generation failed: {e}")
-        return None
+    return STATIC_PLACEHOLDER_EXAMPLE
 
 
 async def ensure_technical_examples_ready(
@@ -185,29 +56,17 @@ async def ensure_technical_examples_ready(
     fast_mode: bool = False
 ) -> bool:
     """
-    Ensure technical examples exist for the language pair.
+    Check if technical examples are ready.
 
-    Generates missing examples using the LLM if a provider is given.
+    Always returns True since we use static examples.
 
     Args:
-        source_lang: Source language name
-        target_lang: Target language name
-        provider: Optional LLMProvider instance
+        source_lang: Source language name (ignored)
+        target_lang: Target language name (ignored)
+        provider: Optional LLMProvider instance (ignored)
         fast_mode: If True, skips placeholder examples (not needed)
 
     Returns:
-        True if all required examples exist or were generated.
+        True always (static examples are always available).
     """
-    if fast_mode:
-        # Fast mode doesn't need placeholder examples
-        return True
-
-    # Placeholder examples for standard mode
-    if not get_cached_technical_example(source_lang, target_lang, "placeholder"):
-        if provider:
-            result = await generate_placeholder_example_async(source_lang, target_lang, provider)
-            return result is not None
-        else:
-            return False
-
     return True
diff --git a/src/config.py b/src/config.py
@@ -96,7 +96,7 @@
 # Token-based chunking configuration
 # When enabled, uses tiktoken to count tokens instead of lines for more consistent chunk sizes
 USE_TOKEN_CHUNKING = os.getenv('USE_TOKEN_CHUNKING', 'true').lower() == 'true'
-MAX_TOKENS_PER_CHUNK = int(os.getenv('MAX_TOKENS_PER_CHUNK', '800'))
+MAX_TOKENS_PER_CHUNK = int(os.getenv('MAX_TOKENS_PER_CHUNK', '450'))
 SOFT_LIMIT_RATIO = float(os.getenv('SOFT_LIMIT_RATIO', '0.8'))
 
 # LLM Provider configuration
diff --git a/src/core/llm_providers.py b/src/core/llm_providers.py
@@ -138,19 +138,23 @@ async def translate_text(self, prompt: str) -> Optional[str]:
 
 
 class OllamaProvider(LLMProvider):
-    """Ollama API provider"""
+    """Ollama API provider - uses /api/chat for proper think parameter support"""
 
     def __init__(self, api_endpoint: str = API_ENDPOINT, model: str = DEFAULT_MODEL,
                  context_window: int = OLLAMA_NUM_CTX, log_callback: Optional[Callable] = None):
         super().__init__(model)
-        self.api_endpoint = api_endpoint
+        # Convert /api/generate endpoint to /api/chat for proper think support
+        self.api_endpoint = api_endpoint.replace('/api/generate', '/api/chat')
         self.context_window = context_window
         self.log_callback = log_callback
 
     async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                       system_prompt: Optional[str] = None) -> Optional[str]:
         """
-        Generate text using Ollama API.
+        Generate text using Ollama Chat API.
+
+        Uses /api/chat instead of /api/generate because the think parameter
+        only works correctly with the chat API (verified with Ollama 0.13.5).
 
         Args:
             prompt: The user prompt (content to translate)
@@ -160,23 +164,26 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
         Returns:
             Generated text or None if failed
         """
+        # Build messages array for chat API
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+
         payload = {
             "model": self.model,
-            "prompt": prompt,
+            "messages": messages,
             "stream": False,
             "options": {
                 "num_ctx": self.context_window,
                 "truncate": False
             },
-            # Disable thinking/reasoning mode for models like Qwen3
-            # This prevents the model from generating <think>...</think> blocks
-            "think": False
+            # Enable thinking mode so Ollama separates thinking into a dedicated field
+            # With think:true, the 'content' field is clean and 'thinking' contains reasoning
+            # With think:false, Qwen3 still outputs reasoning but mixed into 'content'
+            "think": True
         }
 
-        # Add system prompt if provided (Ollama supports 'system' field)
-        if system_prompt:
-            payload["system"] = system_prompt
-
         client = await self._get_client()
         for attempt in range(MAX_TRANSLATION_ATTEMPTS):
             try:
@@ -193,7 +200,9 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                         f"Tokens: prompt={prompt_tokens}, response={response_tokens}, "
                         f"total={total_tokens} (num_ctx={self.context_window})")
 
-                return response_json.get("response", "")
+                # Extract content from chat API response format
+                message = response_json.get("message", {})
+                return message.get("content", "")
 
             except httpx.TimeoutException:
                 if attempt < MAX_TRANSLATION_ATTEMPTS - 1:
@@ -239,7 +248,8 @@ async def get_model_context_size(self) -> int:
         """
         try:
             client = await self._get_client()
-            show_endpoint = self.api_endpoint.replace('/api/generate', '/api/show')
+            # Build /api/show endpoint from chat endpoint
+            show_endpoint = self.api_endpoint.replace('/api/chat', '/api/show').replace('/api/generate', '/api/show')
 
             response = await client.post(
                 show_endpoint,