|
19 | 19 | from optillm.plugins.memory_plugin import run as memory_run |
20 | 20 |
|
21 | 21 |
|
| 22 | +def clean_reasoning_tags(text: str) -> str: |
| 23 | + """ |
| 24 | + Remove reasoning tags from model responses for clean final output. |
| 25 | + |
| 26 | + Removes common reasoning tags like: |
| 27 | + - <think></think> |
| 28 | + - <thinking></thinking> |
| 29 | + - <reasoning></reasoning> |
| 30 | + - <thought></thought> |
| 31 | + |
| 32 | + Args: |
| 33 | + text: Raw model response text |
| 34 | + |
| 35 | + Returns: |
| 36 | + Cleaned text with reasoning tags removed |
| 37 | + """ |
| 38 | + if not text: |
| 39 | + return text |
| 40 | + |
| 41 | + # List of reasoning tag patterns to remove |
| 42 | + reasoning_patterns = [ |
| 43 | + r'<think>.*?</think>', |
| 44 | + r'<thinking>.*?</thinking>', |
| 45 | + r'<reasoning>.*?</reasoning>', |
| 46 | + r'<thought>.*?</thought>', |
| 47 | + r'<reflect>.*?</reflect>', |
| 48 | + r'<reflection>.*?</reflection>', |
| 49 | + ] |
| 50 | + |
| 51 | + cleaned_text = text |
| 52 | + for pattern in reasoning_patterns: |
| 53 | + # Use DOTALL flag to match across newlines |
| 54 | + cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.DOTALL | re.IGNORECASE) |
| 55 | + |
| 56 | + # Clean up any extra whitespace left behind, but preserve markdown formatting |
| 57 | + cleaned_text = re.sub(r'\n\s*\n\s*\n+', '\n\n', cleaned_text) # Multiple empty lines to double |
| 58 | + cleaned_text = re.sub(r' +', ' ', cleaned_text) # Multiple spaces to single space (but preserve intentional double spaces) |
| 59 | + cleaned_text = cleaned_text.strip() |
| 60 | + |
| 61 | + return cleaned_text |
| 62 | + |
| 63 | + |
22 | 64 | class DeepResearcher: |
23 | 65 | """ |
24 | 66 | Implementation of Test-Time Diffusion Deep Researcher (TTD-DR) algorithm |
@@ -77,6 +119,8 @@ def decompose_query(self, system_prompt: str, initial_query: str) -> List[str]: |
77 | 119 | ) |
78 | 120 |
|
79 | 121 | content = response.choices[0].message.content.strip() |
| 122 | + # Clean reasoning tags from query decomposition response |
| 123 | + content = clean_reasoning_tags(content) |
80 | 124 | self.total_tokens += response.usage.completion_tokens |
81 | 125 |
|
82 | 126 | # Extract numbered queries |
@@ -217,6 +261,8 @@ def synthesize_with_memory(self, system_prompt: str, query: str, content: str, s |
217 | 261 |
|
218 | 262 | try: |
219 | 263 | synthesis, tokens = memory_run(system_prompt, memory_input, self.client, self.model) |
| 264 | + # Clean reasoning tags from synthesis response |
| 265 | + synthesis = clean_reasoning_tags(synthesis) |
220 | 266 | return synthesis, tokens |
221 | 267 | except Exception as e: |
222 | 268 | return f"Memory synthesis failed: {str(e)}", 0 |
@@ -254,6 +300,8 @@ def evaluate_completeness(self, system_prompt: str, query: str, current_synthesi |
254 | 300 | ) |
255 | 301 |
|
256 | 302 | content = response.choices[0].message.content.strip() |
| 303 | + # Clean reasoning tags from completeness evaluation response |
| 304 | + content = clean_reasoning_tags(content) |
257 | 305 | self.total_tokens += response.usage.completion_tokens |
258 | 306 |
|
259 | 307 | # Parse response |
@@ -352,6 +400,8 @@ def generate_structured_report(self, system_prompt: str, original_query: str, sy |
352 | 400 | ) |
353 | 401 |
|
354 | 402 | report_content = response.choices[0].message.content.strip() |
| 403 | + # Clean reasoning tags from final report response |
| 404 | + report_content = clean_reasoning_tags(report_content) |
355 | 405 | self.total_tokens += response.usage.completion_tokens |
356 | 406 |
|
357 | 407 | # Add references section with proper formatting |
|
0 commit comments