strands-agents
diff --git a/‎.github/workflows/integration-test.yml
Lines changed: 62 additions & 0 deletions b/‎.github/workflows/integration-test.yml
Lines changed: 62 additions & 0 deletions
diff --git a/‎src/strands/agent/agent.py
Lines changed: 2 additions & 6 deletions b/‎src/strands/agent/agent.py
Lines changed: 2 additions & 6 deletions
diff --git a/‎src/strands/agent/conversation_manager/sliding_window_conversation_manager.py
Lines changed: 82 additions & 1 deletion b/‎src/strands/agent/conversation_manager/sliding_window_conversation_manager.py
Lines changed: 82 additions & 1 deletion
diff --git a/‎src/strands/event_loop/error_handler.py
Lines changed: 2 additions & 67 deletions b/‎src/strands/event_loop/error_handler.py
Lines changed: 2 additions & 67 deletions
diff --git a/‎src/strands/event_loop/event_loop.py
Lines changed: 6 additions & 11 deletions b/‎src/strands/event_loop/event_loop.py
Lines changed: 6 additions & 11 deletions
diff --git a/‎src/strands/event_loop/message_processor.py
Lines changed: 1 addition & 58 deletions b/‎src/strands/event_loop/message_processor.py
Lines changed: 1 addition & 58 deletions
diff --git a/‎src/strands/telemetry/tracer.py
Lines changed: 1 addition & 3 deletions b/‎src/strands/telemetry/tracer.py
Lines changed: 1 addition & 3 deletions
@@ -0,0 +1,62 @@
+name: Secure Integration test
+
+on:
+  pull_request_target:
+    types: [opened, synchronize, labeled, unlabled, reopened]
+  
+jobs:
+  check-access-and-checkout:
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      pull-requests: read
+      contents: read
+    steps:
+      - name: Check PR labels and author
+        id: check
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const pr = context.payload.pull_request;
+            
+            const labels = pr.labels.map(label => label.name);
+            const hasLabel = labels.includes('approved-for-integ-test')
+            if (hasLabel) {
+              core.info('PR contains label approved-for-integ-test')
+              return
+            }
+            
+            const isOwner = pr.author_association === 'OWNER'
+            if (isOwner) {
+              core.info('PR author is an OWNER')
+              return
+            }
+
+            core.setFailed('Pull Request must either have label approved-for-integ-test or be created by an owner')
+      - name: Configure Credentials 
+        uses: aws-actions/configure-aws-credentials@v4
+        with: 
+         role-to-assume: ${{ secrets.STRANDS_INTEG_TEST_ROLE }}
+         aws-region: us-east-1
+         mask-aws-account-id: true
+      - name: Checkout base branch
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.ref }} # Pull the commit from the forked repo
+          persist-credentials: false  # Don't persist credentials for subsequent actions
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+      - name: Install dependencies
+        run: |
+          pip install --no-cache-dir hatch
+      - name: Run integration tests
+        env:
+          AWS_REGION: us-east-1
+          AWS_REGION_NAME: us-east-1 # Needed for LiteLLM
+        id: tests
+        run: |
+          hatch test tests-integ
+      
+    
@@ -108,14 +108,10 @@ def find_normalized_tool_name() -> Optional[str]:
                 # all tools that can be represented with the normalized name
                 if "_" in name:
                     filtered_tools = [
-                        tool_name
-                        for (tool_name, tool) in tool_registry.items()
-                        if tool_name.replace("-", "_") == name
+                        tool_name for (tool_name, tool) in tool_registry.items() if tool_name.replace("-", "_") == name
                     ]
 
-                    if len(filtered_tools) > 1:
-                        raise AttributeError(f"Multiple tools matching '{name}' found: {', '.join(filtered_tools)}")
-
+                    # The registry itself defends against similar names, so we can just take the first match
                     if filtered_tools:
                         return filtered_tools[0]
 
 
@@ -44,14 +44,16 @@ class SlidingWindowConversationManager(ConversationManager):
     invalid window states.
     """
 
-    def __init__(self, window_size: int = 40):
+    def __init__(self, window_size: int = 40, should_truncate_results: bool = True):
         """Initialize the sliding window conversation manager.
 
         Args:
             window_size: Maximum number of messages to keep in the agent's history.
                 Defaults to 40 messages.
+            should_truncate_results: Truncate tool results when a message is too large for the model's context window
         """
         self.window_size = window_size
+        self.should_truncate_results = should_truncate_results
 
     def apply_management(self, agent: "Agent") -> None:
         """Apply the sliding window to the agent's messages array to maintain a manageable history size.
@@ -127,6 +129,19 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None) -> None:
                 converted.
         """
         messages = agent.messages
+
+        # Try to truncate the tool result first
+        last_message_idx_with_tool_results = self._find_last_message_with_tool_results(messages)
+        if last_message_idx_with_tool_results is not None and self.should_truncate_results:
+            logger.debug(
+                "message_index=<%s> | found message with tool results at index", last_message_idx_with_tool_results
+            )
+            results_truncated = self._truncate_tool_results(messages, last_message_idx_with_tool_results)
+            if results_truncated:
+                logger.debug("message_index=<%s> | tool results truncated", last_message_idx_with_tool_results)
+                return
+        
+        # Try to trim index id when tool result cannot be truncated anymore
         # If the number of messages is less than the window_size, then we default to 2, otherwise, trim to window size
         trim_index = 2 if len(messages) <= self.window_size else len(messages) - self.window_size
 
@@ -151,3 +166,69 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None) -> None:
 
         # Overwrite message history
         messages[:] = messages[trim_index:]
+
+    def _truncate_tool_results(self, messages: Messages, msg_idx: int) -> bool:
+        """Truncate tool results in a message to reduce context size.
+
+        When a message contains tool results that are too large for the model's context window, this function
+        replaces the content of those tool results with a simple error message.
+
+        Args:
+            messages: The conversation message history.
+            msg_idx: Index of the message containing tool results to truncate.
+
+        Returns:
+            True if any changes were made to the message, False otherwise.
+        """
+        if msg_idx >= len(messages) or msg_idx < 0:
+            return False
+
+        message = messages[msg_idx]
+        changes_made = False
+        tool_result_too_large_message = "The tool result was too large!"
+        for i, content in enumerate(message.get("content", [])):
+            if isinstance(content, dict) and "toolResult" in content:
+                tool_result_content_text = next(
+                    (item["text"] for item in content["toolResult"]["content"] if "text" in item),
+                    "",
+                )
+                # make the overwriting logic togglable
+                if (
+                    message["content"][i]["toolResult"]["status"] == "error"
+                    and tool_result_content_text == tool_result_too_large_message
+                ):
+                    logger.info("ToolResult has already been updated, skipping overwrite")
+                    return False
+                # Update status to error with informative message
+                message["content"][i]["toolResult"]["status"] = "error"
+                message["content"][i]["toolResult"]["content"] = [{"text": tool_result_too_large_message}]
+                changes_made = True
+
+        return changes_made
+
+    def _find_last_message_with_tool_results(self, messages: Messages) -> Optional[int]:
+        """Find the index of the last message containing tool results.
+
+        This is useful for identifying messages that might need to be truncated to reduce context size.
+
+        Args:
+            messages: The conversation message history.
+
+        Returns:
+            Index of the last message with tool results, or None if no such message exists.
+        """
+        # Iterate backwards through all messages (from newest to oldest)
+        for idx in range(len(messages) - 1, -1, -1):
+            # Check if this message has any content with toolResult
+            current_message = messages[idx]
+            has_tool_result = False
+
+            for content in current_message.get("content", []):
+                if isinstance(content, dict) and "toolResult" in content:
+                    has_tool_result = True
+                    break
+
+            if has_tool_result:
+                return idx
+
+        return None
@@ -6,14 +6,9 @@
 
 import logging
 import time
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, Tuple
 
-from ..telemetry.metrics import EventLoopMetrics
-from ..types.content import Message, Messages
-from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
-from ..types.models import Model
-from ..types.streaming import StopReason
-from .message_processor import find_last_message_with_tool_results, truncate_tool_results
+from ..types.exceptions import ModelThrottledException
 
 logger = logging.getLogger(__name__)
 
@@ -59,63 +54,3 @@ def handle_throttling_error(
 
     callback_handler(force_stop=True, force_stop_reason=str(e))
     return False, current_delay
-
-
-def handle_input_too_long_error(
-    e: ContextWindowOverflowException,
-    messages: Messages,
-    model: Model,
-    system_prompt: Optional[str],
-    tool_config: Any,
-    callback_handler: Any,
-    tool_handler: Any,
-    kwargs: Dict[str, Any],
-) -> Tuple[StopReason, Message, EventLoopMetrics, Any]:
-    """Handle 'Input is too long' errors by truncating tool results.
-
-    When a context window overflow exception occurs (input too long for the model), this function attempts to recover
-    by finding and truncating the most recent tool results in the conversation history. If truncation is successful, the
-    function will make a call to the event loop.
-
-    Args:
-        e: The ContextWindowOverflowException that occurred.
-        messages: The conversation message history.
-        model: Model provider for running inference.
-        system_prompt: System prompt for the model.
-        tool_config: Tool configuration for the conversation.
-        callback_handler: Callback for processing events as they happen.
-        tool_handler: Handler for tool execution.
-        kwargs: Additional arguments for the event loop.
-
-    Returns:
-        The results from the event loop call if successful.
-
-    Raises:
-        ContextWindowOverflowException: If messages cannot be truncated.
-    """
-    from .event_loop import recurse_event_loop  # Import here to avoid circular imports
-
-    # Find the last message with tool results
-    last_message_with_tool_results = find_last_message_with_tool_results(messages)
-
-    # If we found a message with toolResult
-    if last_message_with_tool_results is not None:
-        logger.debug("message_index=<%s> | found message with tool results at index", last_message_with_tool_results)
-
-        # Truncate the tool results in this message
-        truncate_tool_results(messages, last_message_with_tool_results)
-
-        return recurse_event_loop(
-            model=model,
-            system_prompt=system_prompt,
-            messages=messages,
-            tool_config=tool_config,
-            callback_handler=callback_handler,
-            tool_handler=tool_handler,
-            **kwargs,
-        )
-
-    # If we can't handle this error, pass it up
-    callback_handler(force_stop=True, force_stop_reason=str(e))
-    logger.error("an exception occurred in event_loop_cycle | %s", e)
-    raise ContextWindowOverflowException() from e
@@ -22,7 +22,7 @@
 from ..types.models import Model
 from ..types.streaming import Metrics, StopReason
 from ..types.tools import ToolConfig, ToolHandler, ToolResult, ToolUse
-from .error_handler import handle_input_too_long_error, handle_throttling_error
+from .error_handler import handle_throttling_error
 from .message_processor import clean_orphaned_empty_tool_uses
 from .streaming import stream_messages
 
@@ -160,16 +160,7 @@ def event_loop_cycle(
         except ContextWindowOverflowException as e:
             if model_invoke_span:
                 tracer.end_span_with_error(model_invoke_span, str(e), e)
-            return handle_input_too_long_error(
-                e,
-                messages,
-                model,
-                system_prompt,
-                tool_config,
-                callback_handler,
-                tool_handler,
-                kwargs,
-            )
+            raise e
 
         except ModelThrottledException as e:
             if model_invoke_span:
@@ -248,6 +239,10 @@ def event_loop_cycle(
         # Don't invoke the callback_handler or log the exception - we already did it when we
         # raised the exception and we don't need that duplication.
         raise
+    except ContextWindowOverflowException as e:
+        if cycle_span:
+            tracer.end_span_with_error(cycle_span, str(e), e)
+        raise e
     except Exception as e:
         if cycle_span:
             tracer.end_span_with_error(cycle_span, str(e), e)
 
@@ -5,7 +5,7 @@
 """
 
 import logging
-from typing import Dict, Optional, Set, Tuple
+from typing import Dict, Set, Tuple
 
 from ..types.content import Messages
 
@@ -103,60 +103,3 @@ def clean_orphaned_empty_tool_uses(messages: Messages) -> bool:
             logger.warning("failed to fix orphaned tool use | %s", e)
 
     return True
-
-
-def find_last_message_with_tool_results(messages: Messages) -> Optional[int]:
-    """Find the index of the last message containing tool results.
-
-    This is useful for identifying messages that might need to be truncated to reduce context size.
-
-    Args:
-        messages: The conversation message history.
-
-    Returns:
-        Index of the last message with tool results, or None if no such message exists.
-    """
-    # Iterate backwards through all messages (from newest to oldest)
-    for idx in range(len(messages) - 1, -1, -1):
-        # Check if this message has any content with toolResult
-        current_message = messages[idx]
-        has_tool_result = False
-
-        for content in current_message.get("content", []):
-            if isinstance(content, dict) and "toolResult" in content:
-                has_tool_result = True
-                break
-
-        if has_tool_result:
-            return idx
-
-    return None
-
-
-def truncate_tool_results(messages: Messages, msg_idx: int) -> bool:
-    """Truncate tool results in a message to reduce context size.
-
-    When a message contains tool results that are too large for the model's context window, this function replaces the
-    content of those tool results with a simple error message.
-
-    Args:
-        messages: The conversation message history.
-        msg_idx: Index of the message containing tool results to truncate.
-
-    Returns:
-        True if any changes were made to the message, False otherwise.
-    """
-    if msg_idx >= len(messages) or msg_idx < 0:
-        return False
-
-    message = messages[msg_idx]
-    changes_made = False
-
-    for i, content in enumerate(message.get("content", [])):
-        if isinstance(content, dict) and "toolResult" in content:
-            # Update status to error with informative message
-            message["content"][i]["toolResult"]["status"] = "error"
-            message["content"][i]["toolResult"]["content"] = [{"text": "The tool result was too large!"}]
-            changes_made = True
-
-    return changes_made
@@ -13,9 +13,7 @@
 
 from opentelemetry import trace
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-
-# See https://github.com/open-telemetry/opentelemetry-python/issues/4615 for the type ignore
-from opentelemetry.sdk.resources import Resource  # type: ignore[attr-defined]
+from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter, SimpleSpanProcessor
 from opentelemetry.trace import StatusCode