|
12 | 12 | # See the License for the specific language governing permissions and
|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
| 15 | +import asyncio |
| 16 | +import sys |
15 | 17 | from unittest import mock
|
16 | 18 |
|
17 | 19 | from google.adk.agents.llm_agent import LlmAgent
|
|
21 | 23 | from google.adk.evaluation.base_eval_service import InferenceConfig
|
22 | 24 | from google.adk.evaluation.base_eval_service import InferenceRequest
|
23 | 25 | from google.adk.evaluation.base_eval_service import InferenceResult
|
| 26 | +from google.adk.evaluation.base_eval_service import InferenceStatus |
24 | 27 | from google.adk.evaluation.eval_case import Invocation
|
25 | 28 | from google.adk.evaluation.eval_metrics import EvalMetric
|
26 | 29 | from google.adk.evaluation.eval_metrics import EvalMetricResult
|
@@ -361,3 +364,144 @@ def test_generate_final_eval_status_doesn_t_throw_on(eval_service):
|
361 | 364 | metric_name="metric1", threshold=0.5, eval_status=status
|
362 | 365 | )
|
363 | 366 | eval_service._generate_final_eval_status([eval_metric_result])
|
| 367 | + |
| 368 | + |
| 369 | +@pytest.mark.asyncio |
| 370 | +@pytest.mark.skipif( |
| 371 | + sys.version_info < (3, 10), reason="MCP tool requires Python 3.10+" |
| 372 | +) |
| 373 | +async def test_mcp_stdio_agent_no_runtime_error(): |
| 374 | + """Test that LocalEvalService can handle MCP stdio agents without RuntimeError. |
| 375 | +
|
| 376 | + This is a regression test for GitHub issue #2196: |
| 377 | + "RuntimeError: Attempted to exit cancel scope in a different task than it was entered in" |
| 378 | +
|
| 379 | + The fix ensures that Runner.close() is called to properly cleanup MCP connections. |
| 380 | + """ |
| 381 | + import tempfile |
| 382 | + |
| 383 | + from google.adk.evaluation.local_eval_service import LocalEvalService |
| 384 | + from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams |
| 385 | + from google.adk.tools.mcp_tool.mcp_toolset import MCPToolset |
| 386 | + from mcp import StdioServerParameters |
| 387 | + |
| 388 | + # Mock LLM responses to avoid real API calls |
| 389 | + from tests.unittests.testing_utils import MockModel |
| 390 | + |
| 391 | + mock_responses = [ |
| 392 | + genai_types.Content( |
| 393 | + parts=[genai_types.Part(text="Mocked response from test agent")] |
| 394 | + ) |
| 395 | + ] |
| 396 | + mock_model = MockModel.create(responses=mock_responses) |
| 397 | + |
| 398 | + # Create a test agent with MCP stdio toolset and mocked model |
| 399 | + test_dir = tempfile.mkdtemp() |
| 400 | + try: |
| 401 | + agent = LlmAgent( |
| 402 | + model=mock_model, |
| 403 | + name="test_mcp_agent", |
| 404 | + instruction="Test agent for MCP stdio regression test.", |
| 405 | + tools=[ |
| 406 | + MCPToolset( |
| 407 | + connection_params=StdioConnectionParams( |
| 408 | + server_params=StdioServerParameters( |
| 409 | + command="npx", |
| 410 | + args=[ |
| 411 | + "-y", |
| 412 | + "@modelcontextprotocol/server-filesystem", |
| 413 | + test_dir, |
| 414 | + ], |
| 415 | + ), |
| 416 | + timeout=5, |
| 417 | + ), |
| 418 | + tool_filter=["read_file", "list_directory"], |
| 419 | + ) |
| 420 | + ], |
| 421 | + ) |
| 422 | + |
| 423 | + # Create a mock eval sets manager that returns an eval case |
| 424 | + mock_eval_sets_manager = mock.create_autospec(EvalSetsManager) |
| 425 | + test_eval_case = EvalCase( |
| 426 | + eval_id="test_mcp_case", |
| 427 | + conversation=[ |
| 428 | + Invocation( |
| 429 | + user_content=genai_types.Content( |
| 430 | + parts=[genai_types.Part(text="List directory contents")] |
| 431 | + ), |
| 432 | + expected_response="", |
| 433 | + ) |
| 434 | + ], |
| 435 | + ) |
| 436 | + mock_eval_sets_manager.get_eval_case.return_value = test_eval_case |
| 437 | + eval_set = EvalSet( |
| 438 | + eval_set_id="test_set", |
| 439 | + eval_cases=[test_eval_case], |
| 440 | + ) |
| 441 | + mock_eval_sets_manager.get_eval_set.return_value = eval_set |
| 442 | + |
| 443 | + # Create LocalEvalService with MCP agent |
| 444 | + eval_service = LocalEvalService( |
| 445 | + root_agent=agent, |
| 446 | + eval_sets_manager=mock_eval_sets_manager, |
| 447 | + ) |
| 448 | + |
| 449 | + # Create inference request to actually trigger the code path with the fix |
| 450 | + inference_request = InferenceRequest( |
| 451 | + app_name="test_app", |
| 452 | + eval_set_id="test_set", |
| 453 | + inference_config=InferenceConfig(parallelism=1), |
| 454 | + ) |
| 455 | + |
| 456 | + # The main test: actually call perform_inference which will trigger |
| 457 | + # _generate_inferences_from_root_agent where the fix is located |
| 458 | + |
| 459 | + # Note: In Python 3.10 and 3.11, there may be asyncio.CancelledError during cleanup |
| 460 | + # due to anyio cancel scope context violations when MCP toolsets are cleaned up |
| 461 | + # via asyncio.wait_for() in different task contexts. Python 3.12+ enhanced task |
| 462 | + # context management (Task.get_context(), improved context propagation) resolves this. |
| 463 | + |
| 464 | + try: |
| 465 | + results = [] |
| 466 | + async for result in eval_service.perform_inference(inference_request): |
| 467 | + results.append(result) |
| 468 | + # We should get at least one result since we mocked the LLM |
| 469 | + break |
| 470 | + |
| 471 | + # Test passes if we get here without the cancel scope RuntimeError |
| 472 | + # With mocked model, we should get successful inference results |
| 473 | + assert len(results) >= 1 |
| 474 | + |
| 475 | + except RuntimeError as e: |
| 476 | + # If we get a RuntimeError about cancel scope, the fix isn't working |
| 477 | + if "cancel scope" in str(e) and "different task" in str(e): |
| 478 | + pytest.fail(f"MCP stdio RuntimeError regression detected: {e}") |
| 479 | + else: |
| 480 | + # Other RuntimeErrors might be acceptable |
| 481 | + pass |
| 482 | + except asyncio.CancelledError as e: |
| 483 | + # In Python 3.10 and 3.11, anyio cancel scope context violations may manifest as CancelledError |
| 484 | + # when MCP RequestResponder.__exit__() is called in a different task than __enter__() |
| 485 | + if ( |
| 486 | + hasattr(e, "args") |
| 487 | + and len(e.args) > 0 |
| 488 | + and "cancel scope" in str(e.args[0]) |
| 489 | + ): |
| 490 | + pytest.fail(f"MCP stdio cancel scope error regression detected: {e}") |
| 491 | + else: |
| 492 | + # Re-raise other CancelledErrors |
| 493 | + raise |
| 494 | + except Exception as e: |
| 495 | + # Check if this is the specific cancel scope error we're testing for |
| 496 | + if "cancel scope" in str(e) and "different task" in str(e): |
| 497 | + pytest.fail(f"MCP stdio RuntimeError regression detected: {e}") |
| 498 | + # Other exceptions are acceptable for this test |
| 499 | + |
| 500 | + # The main goal is to ensure the test completes without the specific |
| 501 | + # RuntimeError about cancel scopes. If we reach here, the fix is working. |
| 502 | + |
| 503 | + finally: |
| 504 | + # Cleanup |
| 505 | + import shutil |
| 506 | + |
| 507 | + shutil.rmtree(test_dir, ignore_errors=True) |
0 commit comments