fix: Resolve shell duplication and optimize Gemini streaming

JuanCS-Dev · JuanCS-Dev · commit 19d75af819a3 · 2025-11-28T11:22:21.000-03:00
diff --git a/jdev_cli/core/providers/gemini.py b/jdev_cli/core/providers/gemini.py
@@ -9,6 +9,25 @@
 
 # REMOVED top-level import: import google.generativeai as genai
 
+# Anti-repetition and table formatting suffix added to all system prompts
+# Based on: https://ai.google.dev/gemini-api/docs/troubleshooting
+GEMINI_OUTPUT_RULES = """
+
+CRITICAL OUTPUT RULES:
+- Be concise and direct
+- Never repeat yourself
+- Never duplicate content horizontally or vertically
+- Provide each answer only once
+- If you find yourself repeating, STOP and move on
+
+MARKDOWN TABLES - CRITICAL:
+- Use EXACTLY 3 hyphens per column: |---|---|---|
+- NO extra spaces or padding for visual alignment
+- NO tabs - only single spaces
+- FOR TABLE HEADINGS, IMMEDIATELY ADD ' |' AFTER THE HEADING
+- Keep cell content short (under 30 chars)
+"""
+
 class GeminiProvider:
     """Google Gemini API provider."""
     
@@ -232,10 +251,20 @@ async def stream_chat(
             # 2. Initialize Model (with System Instruction)
             # We create a specific instance for this chat to support dynamic system prompt
             # This is lightweight and ensures we use the native system_instruction
+            
+            # Add anti-repetition instructions
+            full_system_prompt = (system_prompt or "") + GEMINI_OUTPUT_RULES
+            
+            # CRITICAL: Temperature MUST be 1.0 for Gemini 2.5+ to prevent looping
+            safe_temperature = 1.0
+            if temperature != 1.0:
+                 # We silently enforce 1.0 for stability as per DeepMind docs
+                 safe_temperature = 1.0
+
             model = self._genai.GenerativeModel(
                 self.model_name,
                 tools=tools if tools else None,
-                system_instruction=system_prompt
+                system_instruction=full_system_prompt
             )
 
             # 3. Prepare History
@@ -257,16 +286,35 @@ def _send():
                     last_user_msg,
                     generation_config={
                         'max_output_tokens': max_tokens,
-                        'temperature': temperature,
+                        'temperature': safe_temperature,
                     },
                     stream=True
                 )
             
             loop = asyncio.get_event_loop()
             response = await loop.run_in_executor(None, _send)
             
+            # FIX: Convert iterable response to iterator
+            response_iterator = iter(response)
+            
             # 6. Stream Response
-            for chunk in response:
+            # We iterate manually to avoid blocking the event loop
+            def _next_chunk():
+                try:
+                    return next(response_iterator)
+                except StopIteration:
+                    return None
+                except Exception as e:
+                    return e
+
+            while True:
+                chunk = await loop.run_in_executor(None, _next_chunk)
+                
+                if chunk is None:
+                    break
+                if isinstance(chunk, Exception):
+                    raise chunk
+
                 try:
                     # Handle Code Execution Parts
                     if hasattr(chunk, 'parts'):
@@ -286,6 +334,7 @@ def _send():
                     continue
                 
                 await asyncio.sleep(0)
+
                     
         except Exception as e:
             logger.error(f"Gemini streaming error: {e}")
diff --git a/jdev_core/language_detector.py b/jdev_core/language_detector.py
@@ -133,10 +133,13 @@ def get_prompt_instruction(cls, text: str) -> Optional[str]:
         Returns:
             Instruction string like "Respond in Portuguese." or None for English
         """
-        code, name = cls.detect_with_name(text)
+        # DISABLE FORCED TRANSLATION (Fix for "OláOlá" duplication issue)
+        # The model is smart enough to reply in the correct language without this.
+        # code, name = cls.detect_with_name(text)
 
-        # Don't add instruction for English (default)
-        if code == "en":
-            return None
+        # # Don't add instruction for English (default)
+        # if code == "en":
+        #     return None
 
-        return f"Respond in {name}."
+        # return f"Respond in {name}."
+        return None
diff --git a/jdev_tui/core/streaming/gemini_stream.py b/jdev_tui/core/streaming/gemini_stream.py
@@ -239,7 +239,8 @@ def _generate():
 
         try:
             # 1. Start Request (Threaded)
-            response_iterator = await loop.run_in_executor(None, _generate)
+            response = await loop.run_in_executor(None, _generate)
+            response_iterator = iter(response)
 
             # 2. Iterate Chunks (Threaded to prevent UI freeze)
             def _next_chunk():