Update: [AEA-5873] - direct lambda invocation refactor of the slackbot (#131)

bencegadanyi1-nhs · web-flow · commit cc52c9a02b74 · 2025-11-14T10:54:14.000Z
## Summary

:sparkles: New Feature

### Details

added direct invocation to the slac bot lambda function to support
upcoming regression testing

comes with the following work:
- direct invocation handler
- moves shared logic - created `ai_processor.py` service to avoid code
duplicating
- types - there's already a good amount of typing in the codebase for
this lambda function, but started extracting them to a more centralised
location for reusability
- tests
diff --git a/packages/cdk/stacks/EpsAssistMeStack.ts b/packages/cdk/stacks/EpsAssistMeStack.ts
@@ -183,6 +183,11 @@ export class EpsAssistMeStack extends Stack {
       exportName: `${props.stackName}:lambda:SlackBot:ExecutionRole:Arn`
     })
 
+    new CfnOutput(this, "SlackBotLambdaArn", {
+      value: functions.slackBotLambda.function.functionArn,
+      exportName: `${props.stackName}:lambda:SlackBot:Arn`
+    })
+
     if (isPullRequest) {
       new CfnOutput(this, "VERSION_NUMBER", {
         value: props.version,
diff --git a/packages/slackBotFunction/app/core/types.py b/packages/slackBotFunction/app/core/types.py
@@ -0,0 +1,86 @@
+"""
+types for direct lambda invocation - defines contracts for bypassing slack
+
+centralizes all type definitions for direct invocation flow to avoid scattered
+inline type hints across handlers and processors.
+"""
+
+from typing import Any, TypedDict, Literal
+from datetime import datetime, timezone
+
+
+class DirectInvocationRequest(TypedDict, total=False):
+    """payload contract for direct lambda calls - bypasses slack entirely"""
+
+    invocation_type: Literal["direct"]
+    query: str
+    session_id: str | None  # conversation continuity across calls
+
+
+class DirectInvocationResponseData(TypedDict):
+    """successful ai response payload - matches slack handler output format"""
+
+    text: str
+    session_id: str | None
+    citations: list[dict[str, str]]  # [{title: str, uri: str}, ...]
+    timestamp: str  # iso8601 with Z suffix
+
+
+class DirectInvocationErrorData(TypedDict):
+    """error response payload - consistent structure for all failure modes"""
+
+    error: str
+    timestamp: str  # iso8601 with Z suffix
+
+
+class DirectInvocationResponse(TypedDict):
+    """complete lambda response envelope - includes status code + payload"""
+
+    statusCode: int
+    response: DirectInvocationResponseData | DirectInvocationErrorData
+
+
+class AIProcessorResponse(TypedDict):
+    """ai processor output - shared between slack and direct invocation"""
+
+    text: str
+    session_id: str | None
+    citations: list[dict[str, str]]
+    # TODO: ensure proper typing for bedrock response when refactoring other types in the future
+    kb_response: dict[str, Any]  # raw bedrock data for slack session handling
+
+
+# type guards for runtime validation
+def is_valid_direct_request(event: dict[str, Any]) -> bool:
+    """validate direct invocation payload structure"""
+    return (
+        event.get("invocation_type") == "direct"
+        and isinstance(event.get("query"), str)
+        and bool(event.get("query", "").strip())  # non-empty after whitespace removal
+    )
+
+
+def create_success_response(
+    text: str, session_id: str | None, citations: list[dict[str, str]]
+) -> DirectInvocationResponse:
+    """factory for successful direct invocation responses"""
+    return {
+        "statusCode": 200,
+        "response": {
+            "text": text,
+            "session_id": session_id,
+            "citations": citations,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+        },
+    }
+
+
+def create_error_response(status_code: int, error_message: str) -> DirectInvocationResponse:
+    """factory for error responses - ensures consistent timestamp format"""
+    return {
+        "statusCode": status_code,
+        "response": {
+            "error": error_message,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+        },
+    }
diff --git a/packages/slackBotFunction/app/handler.py b/packages/slackBotFunction/app/handler.py
@@ -9,7 +9,15 @@
 from slack_bolt.adapter.aws_lambda import SlackRequestHandler
 from aws_lambda_powertools.utilities.typing import LambdaContext
 
+from typing import Any
+
 from app.core.config import get_logger
+from app.core.types import (
+    DirectInvocationResponse,
+    is_valid_direct_request,
+    create_success_response,
+    create_error_response,
+)
 from app.services.app import get_app
 from app.slack.slack_events import process_pull_request_slack_action, process_pull_request_slack_event
 
@@ -33,6 +41,10 @@ def handler(event: dict, context: LambdaContext) -> dict:
     When subsequent actions or events are processed, this is looked up, and if it exists, then the pull request lambda
     is triggered with either pull_request_event or pull_request_action
     """
+    # direct invocation bypasses slack infrastructure entirely
+    if event.get("invocation_type") == "direct":
+        return handle_direct_invocation(event, context)
+
     app = get_app(logger=logger)
     # handle pull request processing requests
     if event.get("pull_request_event"):
@@ -55,3 +67,28 @@ def handler(event: dict, context: LambdaContext) -> dict:
     # handle Slack webhook requests
     slack_handler = SlackRequestHandler(app=app)
     return slack_handler.handle(event=event, context=context)
+
+
+def handle_direct_invocation(event: dict[str, Any], context: LambdaContext) -> DirectInvocationResponse:
+    """direct lambda invocation for ai assistance - bypasses slack entirely"""
+    try:
+        # validate request structure using type guard
+        if not is_valid_direct_request(event):
+            return create_error_response(400, "Missing required field: query")
+
+        query = event["query"]
+        session_id = event.get("session_id")
+
+        # shared logic: same AI processing as slack handlers use
+        from app.services.ai_processor import process_ai_query
+
+        ai_response = process_ai_query(query, session_id)
+
+        return create_success_response(
+            text=ai_response["text"],
+            session_id=ai_response["session_id"],
+            citations=ai_response["citations"],
+        )
+    except Exception as e:
+        logger.error(f"Error in direct invocation: {e}")
+        return create_error_response(500, "Internal server error")
diff --git a/packages/slackBotFunction/app/services/ai_processor.py b/packages/slackBotFunction/app/services/ai_processor.py
@@ -0,0 +1,29 @@
+"""
+shared AI processing service - extracted to avoid duplication
+
+both slack handlers and direct invocation use identical logic for query
+reformulation and bedrock interaction. single source of truth for AI flows.
+"""
+
+from app.services.bedrock import query_bedrock
+from app.services.query_reformulator import reformulate_query
+from app.core.config import get_logger
+from app.core.types import AIProcessorResponse
+
+logger = get_logger()
+
+
+def process_ai_query(user_query: str, session_id: str | None = None) -> AIProcessorResponse:
+    """shared AI processing logic for both slack and direct invocation"""
+    # reformulate: improves vector search quality in knowledge base
+    reformulated_query = reformulate_query(user_query)
+
+    # session_id enables conversation continuity across multiple queries
+    kb_response = query_bedrock(reformulated_query, session_id)
+
+    return {
+        "text": kb_response["output"]["text"],
+        "session_id": kb_response.get("sessionId"),
+        "citations": kb_response.get("citations", []),
+        "kb_response": kb_response,  # slack needs raw bedrock data for session handling
+    }
diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py
@@ -16,14 +16,14 @@
     get_bot_token,
     get_logger,
 )
-from app.services.bedrock import query_bedrock
+
 from app.services.dynamo import (
     delete_state_information,
     get_state_information,
     store_state_information,
     update_state_information,
 )
-from app.services.query_reformulator import reformulate_query
+
 from app.services.slack import get_friendly_channel_name, post_error_message
 from app.utils.handler_utils import (
     conversation_key_and_root,
@@ -34,6 +34,9 @@
     strip_mentions,
 )
 
+from app.services.ai_processor import process_ai_query
+
+
 logger = get_logger()
 
 
@@ -321,16 +324,13 @@ def process_slack_message(event: Dict[str, Any], event_id: str, client: WebClien
             client.chat_postMessage(**post_params)
             return
 
-        # Reformulate query for better RAG retrieval
-        reformulated_query = reformulate_query(user_query)
-
-        # Check if we have an existing Bedrock conversation session
+        # conversation continuity: reuse bedrock session across slack messages
         session_data = get_conversation_session_data(conversation_key)
         session_id = session_data.get("session_id") if session_data else None
 
-        # Query Bedrock Knowledge Base with conversation context
-        kb_response = query_bedrock(reformulated_query, session_id)
-        response_text = kb_response["output"]["text"]
+        ai_response = process_ai_query(user_query, session_id)
+        kb_response = ai_response["kb_response"]
+        response_text = ai_response["text"]
 
         # Post the answer (plain) to get message_ts
         post_params = {"channel": channel, "text": response_text}
diff --git a/packages/slackBotFunction/tests/test_ai_processor.py b/packages/slackBotFunction/tests/test_ai_processor.py
@@ -0,0 +1,145 @@
+"""shared ai processor - validates query reformulation and bedrock integration"""
+
+import pytest
+from unittest.mock import patch
+from app.services.ai_processor import process_ai_query
+
+
+class TestAIProcessor:
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_without_session(self, mock_reformulate, mock_bedrock):
+        """new conversation: no session context passed to bedrock"""
+        mock_reformulate.return_value = "reformulated: How to authenticate EPS API?"
+        mock_bedrock.return_value = {
+            "output": {"text": "To authenticate with EPS API, you need..."},
+            "sessionId": "new-session-abc123",
+            "citations": [{"title": "EPS Authentication Guide", "uri": "https://example.com/auth"}],
+        }
+
+        result = process_ai_query("How to authenticate EPS API?")
+
+        assert result["text"] == "To authenticate with EPS API, you need..."
+        assert result["session_id"] == "new-session-abc123"
+        assert len(result["citations"]) == 1
+        assert result["citations"][0]["title"] == "EPS Authentication Guide"
+        assert "kb_response" in result
+
+        mock_reformulate.assert_called_once_with("How to authenticate EPS API?")
+        mock_bedrock.assert_called_once_with("reformulated: How to authenticate EPS API?", None)
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_with_session(self, mock_reformulate, mock_bedrock):
+        """conversation continuity: existing session maintained across queries"""
+        mock_reformulate.return_value = "reformulated: What about rate limits?"
+        mock_bedrock.return_value = {
+            "output": {"text": "EPS API has rate limits of..."},
+            "sessionId": "existing-session-456",
+            "citations": [],
+        }
+
+        result = process_ai_query("What about rate limits?", session_id="existing-session-456")
+
+        assert result["text"] == "EPS API has rate limits of..."
+        assert result["session_id"] == "existing-session-456"
+        assert result["citations"] == []
+        assert "kb_response" in result
+
+        mock_reformulate.assert_called_once_with("What about rate limits?")
+        mock_bedrock.assert_called_once_with("reformulated: What about rate limits?", "existing-session-456")
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_reformulate_error(self, mock_reformulate, mock_bedrock):
+        """graceful degradation: reformulation failure bubbles up"""
+        mock_reformulate.side_effect = Exception("Query reformulation failed")
+
+        with pytest.raises(Exception) as exc_info:
+            process_ai_query("How to authenticate EPS API?")
+
+        assert "Query reformulation failed" in str(exc_info.value)
+        mock_bedrock.assert_not_called()
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_bedrock_error(self, mock_reformulate, mock_bedrock):
+        """bedrock service failure: error propagated to caller"""
+        mock_reformulate.return_value = "reformulated query"
+        mock_bedrock.side_effect = Exception("Bedrock service error")
+
+        with pytest.raises(Exception) as exc_info:
+            process_ai_query("How to authenticate EPS API?")
+
+        assert "Bedrock service error" in str(exc_info.value)
+        mock_reformulate.assert_called_once()
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_missing_citations(self, mock_reformulate, mock_bedrock):
+        """bedrock response incomplete: citations default to empty list"""
+        mock_reformulate.return_value = "reformulated query"
+        mock_bedrock.return_value = {
+            "output": {"text": "Response without citations"},
+            "sessionId": "session-123",
+            # No citations key
+        }
+
+        result = process_ai_query("test query")
+
+        assert result["text"] == "Response without citations"
+        assert result["session_id"] == "session-123"
+        assert result["citations"] == []  # safe default when bedrock omits citations
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_missing_session_id(self, mock_reformulate, mock_bedrock):
+        """bedrock response incomplete: session_id properly handles None"""
+        mock_reformulate.return_value = "reformulated query"
+        mock_bedrock.return_value = {
+            "output": {"text": "Response without session"},
+            "citations": [],
+            # No sessionId key
+        }
+
+        result = process_ai_query("test query")
+
+        assert result["text"] == "Response without session"
+        assert result["session_id"] is None  # explicit None when bedrock omits sessionId
+        assert result["citations"] == []
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_empty_query(self, mock_reformulate, mock_bedrock):
+        """edge case: empty query still processed through full pipeline"""
+        mock_reformulate.return_value = ""
+        mock_bedrock.return_value = {
+            "output": {"text": "Please provide a question"},
+            "sessionId": "session-empty",
+            "citations": [],
+        }
+
+        result = process_ai_query("")
+
+        assert result["text"] == "Please provide a question"
+        mock_reformulate.assert_called_once_with("")
+        mock_bedrock.assert_called_once_with("", None)
+
+    @patch("app.services.ai_processor.query_bedrock")
+    @patch("app.services.ai_processor.reformulate_query")
+    def test_process_ai_query_includes_raw_response(self, mock_reformulate, mock_bedrock):
+        """slack needs raw bedrock data: kb_response preserved for session handling"""
+        mock_reformulate.return_value = "reformulated query"
+        raw_response = {
+            "output": {"text": "Test response"},
+            "sessionId": "test-123",
+            "citations": [{"title": "Test", "uri": "test.com"}],
+            "metadata": {"some": "extra_data"},
+        }
+        mock_bedrock.return_value = raw_response
+
+        result = process_ai_query("test query")
+
+        assert result["kb_response"] == raw_response
+        assert result["kb_response"]["metadata"]["some"] == "extra_data"
diff --git a/packages/slackBotFunction/tests/test_direct_invocation.py b/packages/slackBotFunction/tests/test_direct_invocation.py
diff --git a/packages/slackBotFunction/tests/test_handlers.py b/packages/slackBotFunction/tests/test_handlers.py
diff --git a/packages/slackBotFunction/tests/test_slack_events.py b/packages/slackBotFunction/tests/test_slack_events.py