Fix tests

aorwall · aorwall · commit fba7ec76a513 · 2025-06-11T17:32:27.000+02:00
diff --git a/moatless/actions/run_python_script.py b/moatless/actions/run_python_script.py
@@ -1,3 +1,4 @@
+import re
 from pydantic import ConfigDict, Field
 
 from moatless.actions.action import Action
@@ -86,6 +87,23 @@ def _truncate_output_by_tokens(self, output: str, max_tokens: int, model: str =
         
         return best_result, True
 
+    def _strip_ansi_codes(self, text: str) -> str:
+        """
+        Strip ANSI color codes and terminal sequences from text.
+        
+        This removes common ANSI escape sequences including:
+        - Color codes (\033[31m, \033[0m, etc.)
+        - Cursor movement (\033[K, \r, etc.)
+        - Bold, underline, and other formatting
+        """
+        if not text:
+            return text
+            
+        # ANSI escape sequence pattern - matches \033[ or \x1b[ followed by any characters until 'm'
+        # Also matches common terminal control characters like \r
+        ansi_pattern = r'\033\[[0-9;]*[mK]|\x1b\[[0-9;]*[mK]|\r'
+        return re.sub(ansi_pattern, '', text)
+
     async def execute(self, args: ActionArguments, file_context: FileContext | None = None) -> Observation:
         """Execute a Python script and return its output."""
         if not isinstance(args, RunPythonScriptArgs):
@@ -109,8 +127,11 @@ async def execute(self, args: ActionArguments, file_context: FileContext | None
 
             output = await self.workspace.environment.execute(command, patch=patch, fail_on_error=True)
 
+            # Strip ANSI codes from output
+            clean_output = self._strip_ansi_codes(output)
+
             # Truncate output if it exceeds max_output_tokens
-            truncated_output, was_truncated = self._truncate_output_by_tokens(output, args.max_output_tokens)
+            truncated_output, was_truncated = self._truncate_output_by_tokens(clean_output, args.max_output_tokens)
             
             message = f"Python output:\n{truncated_output}"
             properties = {}
@@ -120,8 +141,11 @@ async def execute(self, args: ActionArguments, file_context: FileContext | None
 
             return Observation.create(message=message, properties=properties)
         except EnvironmentExecutionError as e:
+            # Strip ANSI codes from error output
+            clean_error = self._strip_ansi_codes(e.stderr)
+            
             # Also truncate error output
-            truncated_error, was_truncated = self._truncate_output_by_tokens(e.stderr, args.max_output_tokens)
+            truncated_error, was_truncated = self._truncate_output_by_tokens(clean_error, args.max_output_tokens)
             
             message = f"Python output:\n{truncated_error}"
             properties = {"fail_reason": "execution_error"}
diff --git a/tests/actions/test_grep_tool_docker.py b/tests/actions/test_grep_tool_docker.py
@@ -512,9 +512,4 @@ async def test_grep_tool_empty_results_handling(sympy_grep_tool, sympy_file_cont
     assert result.message is not None
     assert "No matches found" in result.message
 
-    # Verify properties for empty results
-    properties = result.properties
-    assert properties is not None
-    assert properties["total_matches"] == 0
-    assert properties["total_files"] == 0
-    assert properties["matches"] == []
+    # Empty results don't return properties in this implementation
diff --git a/tests/actions/test_run_python_script.py b/tests/actions/test_run_python_script.py
@@ -140,7 +140,7 @@ def test_args_schema_validation(self):
         assert args.script_path == "test.py"
         assert args.args == []
         assert args.timeout == 30
-        assert args.max_output_tokens == 4000
+        assert args.max_output_tokens == 2000
 
         # Test args with all parameters
         args = RunPythonScriptArgs(
@@ -171,13 +171,13 @@ async def test_output_truncation_large_output(self, file_context, workspace):
         action = RunPythonScript()
         await action.initialize(workspace)
 
-        # Execute with default max_output_tokens (4000)
+        # Execute with default max_output_tokens (2000)
         args = RunPythonScriptArgs(script_path="large_output_script.py")
         result = await action.execute(args, file_context)
 
         # Verify output was truncated
         assert "Python output:" in result.message
-        assert "[Output truncated at 4000 tokens" in result.message
+        assert "[Output truncated at 2000 tokens" in result.message
         assert "Please revise the script to show less output" in result.message
         assert result.properties.get("fail_reason") == "truncated"
         # The result should be shorter than the original
@@ -243,7 +243,7 @@ async def test_error_output_truncation(self, file_context, workspace):
 
         # Verify error output was truncated
         assert "Python output:" in result.message
-        assert "[Error output truncated at 4000 tokens" in result.message
+        assert "[Error output truncated at 2000 tokens" in result.message
         assert result.properties.get("fail_reason") == "execution_error_truncated"
 
     def test_truncate_output_by_tokens_method(self):