99
1010# REMOVED top-level import: import google.generativeai as genai
1111
12+ # Anti-repetition and table formatting suffix added to all system prompts
13+ # Based on: https://ai.google.dev/gemini-api/docs/troubleshooting
14+ GEMINI_OUTPUT_RULES = """
15+
16+ CRITICAL OUTPUT RULES:
17+ - Be concise and direct
18+ - Never repeat yourself
19+ - Never duplicate content horizontally or vertically
20+ - Provide each answer only once
21+ - If you find yourself repeating, STOP and move on
22+
23+ MARKDOWN TABLES - CRITICAL:
24+ - Use EXACTLY 3 hyphens per column: |---|---|---|
25+ - NO extra spaces or padding for visual alignment
26+ - NO tabs - only single spaces
27+ - FOR TABLE HEADINGS, IMMEDIATELY ADD ' |' AFTER THE HEADING
28+ - Keep cell content short (under 30 chars)
29+ """
30+
1231class GeminiProvider :
1332 """Google Gemini API provider."""
1433
@@ -232,10 +251,20 @@ async def stream_chat(
232251 # 2. Initialize Model (with System Instruction)
233252 # We create a specific instance for this chat to support dynamic system prompt
234253 # This is lightweight and ensures we use the native system_instruction
254+
255+ # Add anti-repetition instructions
256+ full_system_prompt = (system_prompt or "" ) + GEMINI_OUTPUT_RULES
257+
258+ # CRITICAL: Temperature MUST be 1.0 for Gemini 2.5+ to prevent looping
259+ safe_temperature = 1.0
260+ if temperature != 1.0 :
261+ # We silently enforce 1.0 for stability as per DeepMind docs
262+ safe_temperature = 1.0
263+
235264 model = self ._genai .GenerativeModel (
236265 self .model_name ,
237266 tools = tools if tools else None ,
238- system_instruction = system_prompt
267+ system_instruction = full_system_prompt
239268 )
240269
241270 # 3. Prepare History
@@ -257,16 +286,35 @@ def _send():
257286 last_user_msg ,
258287 generation_config = {
259288 'max_output_tokens' : max_tokens ,
260- 'temperature' : temperature ,
289+ 'temperature' : safe_temperature ,
261290 },
262291 stream = True
263292 )
264293
265294 loop = asyncio .get_event_loop ()
266295 response = await loop .run_in_executor (None , _send )
267296
297+ # FIX: Convert iterable response to iterator
298+ response_iterator = iter (response )
299+
268300 # 6. Stream Response
269- for chunk in response :
301+ # We iterate manually to avoid blocking the event loop
302+ def _next_chunk ():
303+ try :
304+ return next (response_iterator )
305+ except StopIteration :
306+ return None
307+ except Exception as e :
308+ return e
309+
310+ while True :
311+ chunk = await loop .run_in_executor (None , _next_chunk )
312+
313+ if chunk is None :
314+ break
315+ if isinstance (chunk , Exception ):
316+ raise chunk
317+
270318 try :
271319 # Handle Code Execution Parts
272320 if hasattr (chunk , 'parts' ):
@@ -286,6 +334,7 @@ def _send():
286334 continue
287335
288336 await asyncio .sleep (0 )
337+
289338
290339 except Exception as e :
291340 logger .error (f"Gemini streaming error: { e } " )
0 commit comments