Python: Emit partial result for magentic pattern when retrieving final result, if available (#12656)

moonbox3 · web-flow · commit c15c885f8457 · 2025-07-02T15:18:27.000Z
### Motivation and Context When using the magentic orchestration pattern, if the `max_round_count` is hit, the final result shows as: ``` Final result: Max round count reached. ``` The interim messages do show as part of the `agent_response_callback`; however, not everyone may have that configured. We should return more meaningful results, even if partial. This PR updates to try and get a partial result if it exists. When one calls: ```python value = await orchestration_result.get() ``` for the `step5_magentic.py` sample, with `max_round_count=1` they should receive the partial result: ``` Final result: Based on the available data, here is a comparison of the estimated training and inference energy consumption for ResNet-50, BERT-base, and GPT-2, along with the associated CO₂ emissions when training on an Azure Standard_NC6s_v3 VM for 24 hours. **Model Architectures and Datasets:** - **ResNet-50**: Image classification model trained on ImageNet. - **BERT-base**: Text classification model fine-tuned on the GLUE benchmark. - **GPT-2**: Text generation model trained on WebText. ... <rest omitted for brevity> ... ```  ### Description Return partial results for magentic orchestration if they exist. - Closes #12625  ### Contribution Checklist  - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
diff --git a/python/semantic_kernel/agents/orchestration/magentic.py b/python/semantic_kernel/agents/orchestration/magentic.py
@@ -647,19 +647,32 @@ async def _check_within_limits(self) -> bool:
         if self._context is None:
             raise RuntimeError("The Magentic manager is not started yet. Make sure to send a start message first.")
 
-        if (
+        hit_round_limit = (
             self._manager.max_round_count is not None and self._context.round_count >= self._manager.max_round_count
-        ) or (self._manager.max_reset_count is not None and self._context.reset_count > self._manager.max_reset_count):
-            message = (
-                "Max round count reached."
-                if self._manager.max_round_count and self._context.round_count >= self._manager.max_round_count
-                else "Max reset count reached."
+        )
+        hit_reset_limit = (
+            self._manager.max_reset_count is not None and self._context.reset_count > self._manager.max_reset_count
+        )
+
+        if hit_round_limit or hit_reset_limit:
+            limit_type = "round" if hit_round_limit else "reset"
+            logger.debug(f"Max {limit_type} count reached.")
+
+            # Retrieve the latest assistant content produced so far
+            partial_result = next(
+                (m for m in reversed(self._context.chat_history.messages) if m.role == AuthorRole.ASSISTANT),
+                None,
             )
-            logger.debug(message)
-            if self._result_callback:
-                await self._result_callback(
-                    ChatMessageContent(role=AuthorRole.ASSISTANT, content=message, name=self.__class__.__name__)
+            if partial_result is None:
+                partial_result = ChatMessageContent(
+                    role=AuthorRole.ASSISTANT,
+                    content=f"Stopped because the maximum {limit_type} limit was reached. No partial result available.",
+                    name=self.__class__.__name__,
                 )
+
+            if self._result_callback:
+                await self._result_callback(partial_result)
+
             return False
 
         return True
diff --git a/python/tests/unit/agents/orchestration/test_magentic.py b/python/tests/unit/agents/orchestration/test_magentic.py
@@ -425,7 +425,8 @@ async def test_invoke_with_max_round_count_exceeded():
         finally:
             await runtime.stop_when_idle()
 
-        assert result.content == "Max round count reached."
+        # Partial result will be returned when max round count is exceeded.
+        assert result.content == mock_get_chat_message_content.return_value.content
         assert mock_invoke_stream.call_count == 1
         # Planning will be called once, so the facts and plan will be created once.
         assert mock_get_chat_message_content.call_count == 2
@@ -472,7 +473,9 @@ async def test_invoke_with_max_reset_count_exceeded():
         finally:
             await runtime.stop_when_idle()
 
-        assert result.content == "Max reset count reached."
+        # Partial result will be returned when max reset count is exceeded. The test emits content based on the prompt
+        # so check that the content is not None and not an exact match to a mock response.
+        assert result.content is not None
         assert mock_invoke_stream.call_count == 1
         # Planning and replanning will be each called once, so the facts and plan will be created twice.
         assert mock_get_chat_message_content.call_count == 4
diff --git a/python/uv.lock b/python/uv.lock