Skip to content

Commit 86ee6e3

Browse files
seanzhougooglecopybara-github
authored andcommitted
fix: Close runners after running eval
this fixes #2196 PiperOrigin-RevId: 808618368
1 parent bf4ff31 commit 86ee6e3

File tree

3 files changed

+207
-42
lines changed

3 files changed

+207
-42
lines changed

src/google/adk/evaluation/evaluation_generator.py

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -170,54 +170,53 @@ async def _generate_inferences_from_root_agent(
170170
if not artifact_service:
171171
artifact_service = InMemoryArtifactService()
172172

173-
runner = Runner(
174-
app_name=app_name,
175-
agent=root_agent,
176-
artifact_service=artifact_service,
177-
session_service=session_service,
178-
memory_service=memory_service,
179-
)
180-
181173
# Reset agent state for each query
182174
if callable(reset_func):
183175
reset_func()
184176

185177
response_invocations = []
186178

187-
for invocation in invocations:
188-
final_response = None
189-
user_content = invocation.user_content
190-
tool_uses = []
191-
invocation_id = ""
192-
193-
async with Aclosing(
194-
runner.run_async(
195-
user_id=user_id, session_id=session_id, new_message=user_content
196-
)
197-
) as agen:
198-
async for event in agen:
199-
invocation_id = (
200-
event.invocation_id if not invocation_id else invocation_id
201-
)
202-
203-
if (
204-
event.is_final_response()
205-
and event.content
206-
and event.content.parts
207-
):
208-
final_response = event.content
209-
elif event.get_function_calls():
210-
for call in event.get_function_calls():
211-
tool_uses.append(call)
212-
213-
response_invocations.append(
214-
Invocation(
215-
invocation_id=invocation_id,
216-
user_content=user_content,
217-
final_response=final_response,
218-
intermediate_data=IntermediateData(tool_uses=tool_uses),
219-
)
220-
)
179+
async with Runner(
180+
app_name=app_name,
181+
agent=root_agent,
182+
artifact_service=artifact_service,
183+
session_service=session_service,
184+
memory_service=memory_service,
185+
) as runner:
186+
for invocation in invocations:
187+
final_response = None
188+
user_content = invocation.user_content
189+
tool_uses = []
190+
invocation_id = ""
191+
192+
async with Aclosing(
193+
runner.run_async(
194+
user_id=user_id, session_id=session_id, new_message=user_content
195+
)
196+
) as agen:
197+
async for event in agen:
198+
invocation_id = (
199+
event.invocation_id if not invocation_id else invocation_id
200+
)
201+
202+
if (
203+
event.is_final_response()
204+
and event.content
205+
and event.content.parts
206+
):
207+
final_response = event.content
208+
elif event.get_function_calls():
209+
for call in event.get_function_calls():
210+
tool_uses.append(call)
211+
212+
response_invocations.append(
213+
Invocation(
214+
invocation_id=invocation_id,
215+
user_content=user_content,
216+
final_response=final_response,
217+
intermediate_data=IntermediateData(tool_uses=tool_uses),
218+
)
219+
)
221220

222221
return response_invocations
223222

src/google/adk/runners.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,13 +725,35 @@ async def _cleanup_toolsets(self, toolsets_to_close: set[BaseToolset]):
725725
logger.info('Successfully closed toolset: %s', type(toolset).__name__)
726726
except asyncio.TimeoutError:
727727
logger.warning('Toolset %s cleanup timed out', type(toolset).__name__)
728+
except asyncio.CancelledError as e:
729+
# Handle cancel scope issues in Python 3.10 and 3.11 with anyio
730+
#
731+
# Root cause: MCP library uses anyio.CancelScope() in RequestResponder.__enter__()
732+
# and __exit__() methods. When asyncio.wait_for() creates a new task for cleanup,
733+
# the cancel scope is entered in one task context but exited in another.
734+
#
735+
# Python 3.12+ fixes: Enhanced task context management (Task.get_context()),
736+
# improved context propagation across task boundaries, and better cancellation
737+
# handling prevent the cross-task cancel scope violation.
738+
logger.warning(
739+
'Toolset %s cleanup cancelled: %s', type(toolset).__name__, e
740+
)
728741
except Exception as e:
729742
logger.error('Error closing toolset %s: %s', type(toolset).__name__, e)
730743

731744
async def close(self):
732745
"""Closes the runner."""
733746
await self._cleanup_toolsets(self._collect_toolset(self.agent))
734747

748+
async def __aenter__(self):
749+
"""Async context manager entry."""
750+
return self
751+
752+
async def __aexit__(self, exc_type, exc_val, exc_tb):
753+
"""Async context manager exit."""
754+
await self.close()
755+
return False # Don't suppress exceptions from the async with block
756+
735757

736758
class InMemoryRunner(Runner):
737759
"""An in-memory Runner for testing and development.

tests/unittests/evaluation/test_local_eval_service.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import asyncio
16+
import sys
1517
from unittest import mock
1618

1719
from google.adk.agents.llm_agent import LlmAgent
@@ -21,6 +23,7 @@
2123
from google.adk.evaluation.base_eval_service import InferenceConfig
2224
from google.adk.evaluation.base_eval_service import InferenceRequest
2325
from google.adk.evaluation.base_eval_service import InferenceResult
26+
from google.adk.evaluation.base_eval_service import InferenceStatus
2427
from google.adk.evaluation.eval_case import Invocation
2528
from google.adk.evaluation.eval_metrics import EvalMetric
2629
from google.adk.evaluation.eval_metrics import EvalMetricResult
@@ -361,3 +364,144 @@ def test_generate_final_eval_status_doesn_t_throw_on(eval_service):
361364
metric_name="metric1", threshold=0.5, eval_status=status
362365
)
363366
eval_service._generate_final_eval_status([eval_metric_result])
367+
368+
369+
@pytest.mark.asyncio
370+
@pytest.mark.skipif(
371+
sys.version_info < (3, 10), reason="MCP tool requires Python 3.10+"
372+
)
373+
async def test_mcp_stdio_agent_no_runtime_error():
374+
"""Test that LocalEvalService can handle MCP stdio agents without RuntimeError.
375+
376+
This is a regression test for GitHub issue #2196:
377+
"RuntimeError: Attempted to exit cancel scope in a different task than it was entered in"
378+
379+
The fix ensures that Runner.close() is called to properly cleanup MCP connections.
380+
"""
381+
import tempfile
382+
383+
from google.adk.evaluation.local_eval_service import LocalEvalService
384+
from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
385+
from google.adk.tools.mcp_tool.mcp_toolset import MCPToolset
386+
from mcp import StdioServerParameters
387+
388+
# Mock LLM responses to avoid real API calls
389+
from tests.unittests.testing_utils import MockModel
390+
391+
mock_responses = [
392+
genai_types.Content(
393+
parts=[genai_types.Part(text="Mocked response from test agent")]
394+
)
395+
]
396+
mock_model = MockModel.create(responses=mock_responses)
397+
398+
# Create a test agent with MCP stdio toolset and mocked model
399+
test_dir = tempfile.mkdtemp()
400+
try:
401+
agent = LlmAgent(
402+
model=mock_model,
403+
name="test_mcp_agent",
404+
instruction="Test agent for MCP stdio regression test.",
405+
tools=[
406+
MCPToolset(
407+
connection_params=StdioConnectionParams(
408+
server_params=StdioServerParameters(
409+
command="npx",
410+
args=[
411+
"-y",
412+
"@modelcontextprotocol/server-filesystem",
413+
test_dir,
414+
],
415+
),
416+
timeout=5,
417+
),
418+
tool_filter=["read_file", "list_directory"],
419+
)
420+
],
421+
)
422+
423+
# Create a mock eval sets manager that returns an eval case
424+
mock_eval_sets_manager = mock.create_autospec(EvalSetsManager)
425+
test_eval_case = EvalCase(
426+
eval_id="test_mcp_case",
427+
conversation=[
428+
Invocation(
429+
user_content=genai_types.Content(
430+
parts=[genai_types.Part(text="List directory contents")]
431+
),
432+
expected_response="",
433+
)
434+
],
435+
)
436+
mock_eval_sets_manager.get_eval_case.return_value = test_eval_case
437+
eval_set = EvalSet(
438+
eval_set_id="test_set",
439+
eval_cases=[test_eval_case],
440+
)
441+
mock_eval_sets_manager.get_eval_set.return_value = eval_set
442+
443+
# Create LocalEvalService with MCP agent
444+
eval_service = LocalEvalService(
445+
root_agent=agent,
446+
eval_sets_manager=mock_eval_sets_manager,
447+
)
448+
449+
# Create inference request to actually trigger the code path with the fix
450+
inference_request = InferenceRequest(
451+
app_name="test_app",
452+
eval_set_id="test_set",
453+
inference_config=InferenceConfig(parallelism=1),
454+
)
455+
456+
# The main test: actually call perform_inference which will trigger
457+
# _generate_inferences_from_root_agent where the fix is located
458+
459+
# Note: In Python 3.10 and 3.11, there may be asyncio.CancelledError during cleanup
460+
# due to anyio cancel scope context violations when MCP toolsets are cleaned up
461+
# via asyncio.wait_for() in different task contexts. Python 3.12+ enhanced task
462+
# context management (Task.get_context(), improved context propagation) resolves this.
463+
464+
try:
465+
results = []
466+
async for result in eval_service.perform_inference(inference_request):
467+
results.append(result)
468+
# We should get at least one result since we mocked the LLM
469+
break
470+
471+
# Test passes if we get here without the cancel scope RuntimeError
472+
# With mocked model, we should get successful inference results
473+
assert len(results) >= 1
474+
475+
except RuntimeError as e:
476+
# If we get a RuntimeError about cancel scope, the fix isn't working
477+
if "cancel scope" in str(e) and "different task" in str(e):
478+
pytest.fail(f"MCP stdio RuntimeError regression detected: {e}")
479+
else:
480+
# Other RuntimeErrors might be acceptable
481+
pass
482+
except asyncio.CancelledError as e:
483+
# In Python 3.10 and 3.11, anyio cancel scope context violations may manifest as CancelledError
484+
# when MCP RequestResponder.__exit__() is called in a different task than __enter__()
485+
if (
486+
hasattr(e, "args")
487+
and len(e.args) > 0
488+
and "cancel scope" in str(e.args[0])
489+
):
490+
pytest.fail(f"MCP stdio cancel scope error regression detected: {e}")
491+
else:
492+
# Re-raise other CancelledErrors
493+
raise
494+
except Exception as e:
495+
# Check if this is the specific cancel scope error we're testing for
496+
if "cancel scope" in str(e) and "different task" in str(e):
497+
pytest.fail(f"MCP stdio RuntimeError regression detected: {e}")
498+
# Other exceptions are acceptable for this test
499+
500+
# The main goal is to ensure the test completes without the specific
501+
# RuntimeError about cancel scopes. If we reach here, the fix is working.
502+
503+
finally:
504+
# Cleanup
505+
import shutil
506+
507+
shutil.rmtree(test_dir, ignore_errors=True)

0 commit comments

Comments
 (0)