error handling fixes

kantord · kantord · commit 4d5e3793d223 · 2025-11-26T12:08:55.000+01:00
diff --git a/shell_engine.py b/shell_engine.py
@@ -476,7 +476,6 @@ async def execute_pipeline(self, pipeline: list[dict]) -> str:
             return output
 
         except Exception as e:
-            import traceback
-
-            error_details = f"Pipeline execution failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
-            return error_details
+            # Re-raise so MCP layer sets isError=True in the response
+            # This ensures clients properly display/handle the error
+            raise RuntimeError(f"Pipeline execution failed: {str(e)}") from e
diff --git a/tests/test_shell_engine.py b/tests/test_shell_engine.py
@@ -304,7 +304,7 @@ async def test_tool_stage_non_dict_json_upstream(self):
         array_input = '["item1", "item2", "item3"]'
         upstream = iter([array_input])
 
-        result = await engine.tool_stage("test_server", "test_tool", {}, upstream)
+        await engine.tool_stage("test_server", "test_tool", {}, upstream)
 
         # Should add array as 'input' field
         mock_caller.assert_called_once()
@@ -320,7 +320,7 @@ async def test_tool_stage_plain_text_upstream(self):
         text_input = "some plain text data"
         upstream = iter([text_input])
 
-        result = await engine.tool_stage("test_server", "test_tool", {}, upstream)
+        await engine.tool_stage("test_server", "test_tool", {}, upstream)
 
         # Should add text as 'input' field
         mock_caller.assert_called_once()
@@ -335,7 +335,7 @@ async def test_tool_stage_non_dict_json_does_not_override_existing_input(self):
         array_input = '["upstream_data"]'
         upstream = iter([array_input])
 
-        result = await engine.tool_stage(
+        await engine.tool_stage(
             "test_server", "test_tool", {"input": "explicit_input"}, upstream
         )
 
@@ -424,10 +424,10 @@ async def test_execute_pipeline_invalid_command(self):
 
         pipeline = [{"type": "command", "command": "rm", "args": ["-rf", "/"]}]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "not allowed" in result
+        with pytest.raises(
+            RuntimeError, match="Pipeline execution failed.*not allowed"
+        ):
+            await engine.execute_pipeline(pipeline)
 
     async def test_execute_pipeline_missing_command_field(self):
         """Test pipeline with missing command field."""
@@ -438,10 +438,10 @@ async def test_execute_pipeline_missing_command_field(self):
             {"type": "command", "args": ["test"]}  # Missing "command" field
         ]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "missing 'command' field" in result
+        with pytest.raises(
+            RuntimeError, match="Pipeline execution failed.*missing 'command' field"
+        ):
+            await engine.execute_pipeline(pipeline)
 
     async def test_execute_pipeline_missing_tool_name_field(self):
         """Test pipeline with missing tool name field."""
@@ -451,10 +451,10 @@ async def test_execute_pipeline_missing_tool_name_field(self):
         # Missing "name" field
         pipeline = [{"type": "tool", "server": "test_server", "args": {}}]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "missing 'name' field" in result
+        with pytest.raises(
+            RuntimeError, match="Pipeline execution failed.*missing 'name' field"
+        ):
+            await engine.execute_pipeline(pipeline)
 
     async def test_execute_pipeline_missing_tool_server_field(self):
         """Test pipeline with missing tool server field."""
@@ -464,10 +464,10 @@ async def test_execute_pipeline_missing_tool_server_field(self):
         # Missing "server" field
         pipeline = [{"type": "tool", "name": "test_tool", "args": {}}]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "missing 'server' field" in result
+        with pytest.raises(
+            RuntimeError, match="Pipeline execution failed.*missing 'server' field"
+        ):
+            await engine.execute_pipeline(pipeline)
 
     async def test_execute_pipeline_invalid_args_type(self):
         """Test pipeline with invalid args type (not a list)."""
@@ -476,10 +476,10 @@ async def test_execute_pipeline_invalid_args_type(self):
 
         pipeline = [{"type": "command", "command": "grep", "args": "not-a-list"}]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "must be an array" in result
+        with pytest.raises(
+            RuntimeError, match="Pipeline execution failed.*must be an array"
+        ):
+            await engine.execute_pipeline(pipeline)
 
     async def test_execute_pipeline_unknown_stage_type(self):
         """Test pipeline with unknown stage type."""
@@ -488,10 +488,10 @@ async def test_execute_pipeline_unknown_stage_type(self):
 
         pipeline = [{"type": "unknown_type", "data": "test"}]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "Unknown pipeline item type" in result
+        with pytest.raises(
+            RuntimeError, match="Pipeline execution failed.*Unknown pipeline item type"
+        ):
+            await engine.execute_pipeline(pipeline)
 
     async def test_execute_pipeline_empty_pipeline(self):
         """Test pipeline with no stages returns empty string."""
@@ -548,10 +548,10 @@ async def failing_caller(server, tool, args):
 
         pipeline = [{"type": "tool", "name": "test", "server": "test", "args": {}}]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "Tool call failed" in result
+        with pytest.raises(
+            RuntimeError, match="Pipeline execution failed.*Tool call failed"
+        ):
+            await engine.execute_pipeline(pipeline)
 
     async def test_stage_error_includes_stage_number(self):
         """Test that errors include the stage number."""
@@ -568,10 +568,8 @@ async def test_stage_error_includes_stage_number(self):
             {"type": "command", "command": "grep", "args": ["never reached"]},
         ]
 
-        result = await engine.execute_pipeline(pipeline)
-
-        assert "Pipeline execution failed" in result
-        assert "Stage 2" in result
+        with pytest.raises(RuntimeError, match="Pipeline execution failed.*Stage 2"):
+            await engine.execute_pipeline(pipeline)
 
 
 @pytest.mark.asyncio
@@ -640,11 +638,10 @@ async def test_execute_pipeline_command_timeout(self):
         ]
 
         start = time.time()
-        result = await engine.execute_pipeline(pipeline)
+        with pytest.raises(RuntimeError, match="Pipeline execution failed.*timed out"):
+            await engine.execute_pipeline(pipeline)
         elapsed = time.time() - start
 
-        # Should fail with timeout error
-        assert "timeout" in result.lower() or "timed out" in result.lower()
         assert elapsed < 0.5, f"Timeout took too long: {elapsed} seconds"
 
     @pytest.mark.timeout(2)
@@ -666,7 +663,8 @@ async def test_execute_pipeline_for_each_with_timeout(self):
         ]
 
         start = time.time()
-        await engine.execute_pipeline(pipeline)
+        with pytest.raises(RuntimeError, match="Pipeline execution failed.*timed out"):
+            await engine.execute_pipeline(pipeline)
         elapsed = time.time() - start
 
         # Should timeout quickly, not wait 20 seconds (10s × 2 lines)