Bump version to 0.3.0 and fix test suite

victordibia · victordibia · commit baaf974bab55 · 2025-11-10T10:52:31.000-08:00
- Bump version from 0.2.3 to 0.3.0
- Update CHANGELOG.md with new features and improvements from 11 commits
- Fix test mocks to properly use AgentContext matching real Agent behavior
- Update memory tool test for new search and append commands
- Add skipif marker for otel test when opentelemetry not installed
- Remove deprecated execute_agent tests (API now streaming-only)
- All 252 tests now pass (1 skipped)
diff --git a/picoagents/CHANGELOG.md b/picoagents/CHANGELOG.md
@@ -5,6 +5,43 @@ All notable changes to PicoAgents will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.0] - 2025-11-10
+
+### Added
+
+- Model Context Protocol (MCP) integration with complete client implementation
+  - MCP client, configuration, and transport layers
+  - MCP tool wrapper for seamless integration with PicoAgents tools
+  - Examples and comprehensive test coverage
+- Software Engineering (SWE) agent implementation with full documentation
+- Enhanced evaluation framework with comprehensive evaluation system
+  - Expected answer generation utilities
+  - Results tracking and visualization
+  - Updated composite and LLM judges
+- YouTube caption tool for extracting transcripts
+- List memory example demonstrating memory management patterns
+- Context inspector component in Web UI for debugging agent context
+- Message handling and entity execution hooks in Web UI frontend
+- Workflow progress tracking with dedicated test coverage
+- Premium samples collection with documentation
+
+### Changed
+
+- Enhanced research tools with improved capabilities
+- Improved Web UI execution handling and state management
+- Updated agent and orchestration examples with better patterns
+- Refined LLM client implementations (OpenAI and Azure OpenAI) for better error handling
+- Improved workflow runner with enhanced progress reporting
+- Updated evaluation results with new metrics and visualizations
+- Enhanced memory tool with better examples
+
+### Fixed
+
+- Test mocks now properly use AgentContext matching real Agent behavior
+- Web UI frontend dependency updates for security and compatibility
+- Tool initialization and registration improvements
+- Message handling in agent communication
+
 ## [0.2.3] - 2025-10-22
 
 ### Added
diff --git a/picoagents/pyproject.toml b/picoagents/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "picoagents"
-version = "0.2.3"
+version = "0.3.0"
 description = "A minimal multi-agent framework for educational purposes"
 readme = "README.md"
 license = "MIT"
diff --git a/picoagents/tests/test_memory_tool.py b/picoagents/tests/test_memory_tool.py
@@ -213,6 +213,8 @@ async def test_tool_parameters_schema(self, memory_tool):
             "insert",
             "delete",
             "rename",
+            "search",  # Added in 0.3.0
+            "append",  # Added in 0.3.0
         }
 
     @pytest.mark.asyncio
diff --git a/picoagents/tests/test_orchestrator.py b/picoagents/tests/test_orchestrator.py
@@ -9,6 +9,7 @@
 
 from picoagents._cancellation_token import CancellationToken
 from picoagents.agents import BaseAgent
+from picoagents.context import AgentContext
 from picoagents.messages import AssistantMessage, Message, UserMessage
 from picoagents.orchestration import (
     MaxMessageTermination,
@@ -47,23 +48,27 @@ async def run(
         # Simulate agent processing
         await asyncio.sleep(0.01)
 
-        # Create response messages - preserve full context plus new response
+        # Create context with messages (matching real Agent behavior)
+        context = AgentContext()
+
+        # Add input messages to context
         if isinstance(task, list):
-            # Return full context plus our response
-            context_messages = task.copy()
+            for msg in task:
+                context.add_message(msg)
         elif isinstance(task, str):
-            context_messages = [UserMessage(content=task, source="user")]
+            context.add_message(UserMessage(content=task, source="user"))
         else:
-            context_messages = [task]
+            context.add_message(task)
 
+        # Add assistant response
         assistant_message = AssistantMessage(
             content=self.response_text, source=self.name
         )
-        all_messages = cast(List[Message], context_messages + [assistant_message])
+        context.add_message(assistant_message)
 
         return AgentResponse(
+            context=context,
             source=self.name,
-            messages=all_messages,
             usage=Usage(duration_ms=10, llm_calls=1, tokens_input=10, tokens_output=5),
             finish_reason="stop",
         )
@@ -83,28 +88,33 @@ async def run_stream(
         # Simulate agent processing
         await asyncio.sleep(0.01)
 
-        # Create response messages - preserve full context plus new response
+        # Create context with messages (matching real Agent behavior)
+        context = AgentContext()
+
+        # Add input messages to context and yield them
         if isinstance(task, list):
-            # Return full context plus our response
-            context_messages = task.copy()
+            for msg in task:
+                context.add_message(msg)
+                yield msg
         elif isinstance(task, str):
-            context_messages = [UserMessage(content=task, source="user")]
+            msg = UserMessage(content=task, source="user")
+            context.add_message(msg)
+            yield msg
         else:
-            context_messages = [task]
+            context.add_message(task)
+            yield task
 
+        # Add and yield assistant response
         assistant_message = AssistantMessage(
             content=self.response_text, source=self.name
         )
-        all_messages = cast(List[Message], context_messages + [assistant_message])
-
-        # Yield all messages
-        for message in all_messages:
-            yield message
+        context.add_message(assistant_message)
+        yield assistant_message
 
-        # Yield final response
+        # Yield final response with context
         yield AgentResponse(
+            context=context,
             source=self.name,
-            messages=all_messages,
             usage=Usage(duration_ms=10, llm_calls=1, tokens_input=10, tokens_output=5),
             finish_reason="stop",
         )
@@ -313,28 +323,32 @@ async def run(
 
             await asyncio.sleep(0.01)
 
-            # Analyze the context to count how many turns have occurred
+            # Create context with messages (matching real Agent behavior)
+            context = AgentContext()
+
+            # Analyze the task to count how many turns have occurred
+            response_count = 0
             if isinstance(task, str):
                 # Count occurrences of "Iteration" in the context (indicating previous agent responses)
                 response_count = task.count("Iteration")
-                context_messages = [UserMessage(content=task, source="user")]
+                context.add_message(UserMessage(content=task, source="user"))
             elif isinstance(task, list):
                 response_count = len(
                     [msg for msg in task if isinstance(msg, AssistantMessage)]
                 )
-                context_messages = task.copy()
+                for msg in task:
+                    context.add_message(msg)
             else:
-                response_count = 0
-                context_messages = [UserMessage(content=str(task), source="user")]
+                context.add_message(UserMessage(content=str(task), source="user"))
 
             assistant_message = AssistantMessage(
                 content=f"Iteration {response_count + 1}", source=self.name
             )
-            all_messages = cast(List[Message], context_messages + [assistant_message])
+            context.add_message(assistant_message)
 
             return AgentResponse(
+                context=context,
                 source=self.name,
-                messages=all_messages,
                 usage=Usage(duration_ms=10, llm_calls=1),
                 finish_reason="stop",
             )
@@ -351,33 +365,39 @@ async def run_stream(
 
             await asyncio.sleep(0.01)
 
-            # Analyze the context to count how many turns have occurred
+            # Create context with messages (matching real Agent behavior)
+            context = AgentContext()
+
+            # Analyze the task to count how many turns have occurred
+            response_count = 0
             if isinstance(task, str):
                 # Count occurrences of "Iteration" in the context (indicating previous agent responses)
                 response_count = task.count("Iteration")
-                context_messages = [UserMessage(content=task, source="user")]
+                msg = UserMessage(content=task, source="user")
+                context.add_message(msg)
+                yield msg
             elif isinstance(task, list):
                 response_count = len(
                     [msg for msg in task if isinstance(msg, AssistantMessage)]
                 )
-                context_messages = task.copy()
+                for msg in task:
+                    context.add_message(msg)
+                    yield msg
             else:
-                response_count = 0
-                context_messages = [UserMessage(content=str(task), source="user")]
+                msg = UserMessage(content=str(task), source="user")
+                context.add_message(msg)
+                yield msg
 
             assistant_message = AssistantMessage(
                 content=f"Iteration {response_count + 1}", source=self.name
             )
-            all_messages = cast(List[Message], context_messages + [assistant_message])
-
-            # Yield all messages
-            for message in all_messages:
-                yield message
+            context.add_message(assistant_message)
+            yield assistant_message
 
-            # Yield final response
+            # Yield final response with context
             yield AgentResponse(
+                context=context,
                 source=self.name,
-                messages=all_messages,
                 usage=Usage(duration_ms=10, llm_calls=1),
                 finish_reason="stop",
             )
diff --git a/picoagents/tests/test_otel.py b/picoagents/tests/test_otel.py
@@ -11,6 +11,13 @@
 from picoagents._otel import OTelMiddleware, _is_enabled, auto_instrument
 from picoagents.context import AgentContext
 
+# Check if opentelemetry is available
+try:
+    import opentelemetry  # noqa: F401
+    HAS_OPENTELEMETRY = True
+except ImportError:
+    HAS_OPENTELEMETRY = False
+
 
 class TestOTelConfig:
     """Test OpenTelemetry configuration."""
@@ -104,6 +111,7 @@ def test_auto_instrument_patches_agent_when_enabled(self):
 class TestIntegration:
     """Integration tests with mock tracer."""
 
+    @pytest.mark.skipif(not HAS_OPENTELEMETRY, reason="opentelemetry package not installed")
     @pytest.mark.asyncio
     async def test_end_to_end_with_mock_tracer(self):
         """Test full middleware flow with mocked OTel."""
diff --git a/picoagents/tests/test_termination.py b/picoagents/tests/test_termination.py
@@ -11,6 +11,7 @@
 
 # Additional imports for integration tests
 from picoagents.agents import BaseAgent
+from picoagents.context import AgentContext
 from picoagents.messages import AssistantMessage, Message, ToolMessage, UserMessage
 from picoagents.orchestration import RoundRobinOrchestrator
 from picoagents.termination import (
@@ -398,23 +399,28 @@ async def run(
         task: Union[str, UserMessage, List[Message]],
         cancellation_token: Optional[CancellationToken] = None,
     ) -> AgentResponse:
-        """Return only the user message + one new assistant message."""
-        # Always return exactly one new message (no context duplication)
+        """Return response with proper AgentContext."""
+        # Create context with messages (matching real Agent behavior)
+        context = AgentContext()
+
+        # Add input messages to context
         if isinstance(task, list):
-            context_messages = task.copy()
+            for msg in task:
+                context.add_message(msg)
         elif isinstance(task, str):
-            context_messages = [UserMessage(content=task, source="user")]
+            context.add_message(UserMessage(content=task, source="user"))
         else:
-            context_messages = [task]
+            context.add_message(task)
 
+        # Add assistant response
         assistant_message = AssistantMessage(
             content=self.response_text, source=self.name
         )
-        all_messages = context_messages + [assistant_message]
+        context.add_message(assistant_message)
 
         return AgentResponse(
+            context=context,
             source=self.name,
-            messages=all_messages,
             usage=Usage(duration_ms=10, llm_calls=1),
             finish_reason="stop",
         )
@@ -428,15 +434,37 @@ async def run_stream(
     ) -> AsyncGenerator[
         Union[Message, AgentEvent, AgentResponse, ChatCompletionChunk], None
     ]:
-        """Stream the same result as run()."""
-        result = await self.run(task, cancellation_token)
+        """Stream messages with proper AgentContext."""
+        # Create context with messages (matching real Agent behavior)
+        context = AgentContext()
+
+        # Add input messages to context and yield them
+        if isinstance(task, list):
+            for msg in task:
+                context.add_message(msg)
+                yield msg
+        elif isinstance(task, str):
+            msg = UserMessage(content=task, source="user")
+            context.add_message(msg)
+            yield msg
+        else:
+            context.add_message(task)
+            yield task
 
-        # Yield each message individually
-        for message in result.messages:
-            yield message
+        # Add and yield assistant response
+        assistant_message = AssistantMessage(
+            content=self.response_text, source=self.name
+        )
+        context.add_message(assistant_message)
+        yield assistant_message
 
-        # Yield the final result
-        yield result
+        # Yield final response with context
+        yield AgentResponse(
+            context=context,
+            source=self.name,
+            usage=Usage(duration_ms=10, llm_calls=1),
+            finish_reason="stop",
+        )
 
 
 @pytest.mark.asyncio
diff --git a/picoagents/tests/webui/test_execution.py b/picoagents/tests/webui/test_execution.py
@@ -90,33 +90,9 @@ def execution_engine():
     return ExecutionEngine(session_manager)
 
 
-@pytest.mark.asyncio
-async def test_execute_agent(execution_engine):
-    """Test executing an agent (non-streaming)."""
-    agent = MockAgent("TestAgent")
-    messages = [
-        create_chat_message("user", "Hello"),
-    ]
-
-    response = await execution_engine.execute_agent(agent, messages)
-
-    assert isinstance(response, AgentResponse)
-    assert len(response.messages) > 0
-    assert response.usage.duration_ms > 0
-    assert response.source == "TestAgent"
-
-
-@pytest.mark.asyncio
-async def test_execute_agent_with_session_id(execution_engine):
-    """Test executing agent with existing session ID."""
-    agent = MockAgent("TestAgent")
-    messages = [create_chat_message("user", "Hello")]
-    session_id = "existing_session"
-
-    response = await execution_engine.execute_agent(agent, messages, session_id)
-
-    assert isinstance(response, AgentResponse)
-    assert response.source == "TestAgent"
+# NOTE: Non-streaming execute_agent method was removed in 0.3.0
+# The API now only supports streaming execution via execute_agent_stream
+# See test_execute_agent_stream for the replacement functionality
 
 
 @pytest.mark.asyncio

Original file line number	Diff line number	Diff line change
`@@ -213,6 +213,8 @@ async def test_tool_parameters_schema(self, memory_tool):`
`213`	`213`	`"insert",`
`214`	`214`	`"delete",`
`215`	`215`	`"rename",`
	`216`	`+ "search", # Added in 0.3.0`
	`217`	`+ "append", # Added in 0.3.0`
`216`	`218`	`}`
`217`	`219`
`218`	`220`	`@pytest.mark.asyncio`