|
7 | 7 | """ |
8 | 8 |
|
9 | 9 | from collections.abc import Sequence |
| 10 | +from typing import Any |
10 | 11 |
|
11 | 12 | from deepagents import create_deep_agent |
12 | 13 | from langchain_core.tools import BaseTool |
|
23 | 24 | from app.agents.new_chat.tools.registry import build_tools_async |
24 | 25 | from app.services.connector_service import ConnectorService |
25 | 26 |
|
| 27 | +# ============================================================================= |
| 28 | +# Connector Type Mapping |
| 29 | +# ============================================================================= |
| 30 | + |
| 31 | +# Maps SearchSourceConnectorType enum values to the searchable document/connector types |
| 32 | +# used by the knowledge_base tool. Some connectors map to different document types. |
| 33 | +_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = { |
| 34 | + # Direct mappings (connector type == searchable type) |
| 35 | + "TAVILY_API": "TAVILY_API", |
| 36 | + "SEARXNG_API": "SEARXNG_API", |
| 37 | + "LINKUP_API": "LINKUP_API", |
| 38 | + "BAIDU_SEARCH_API": "BAIDU_SEARCH_API", |
| 39 | + "SLACK_CONNECTOR": "SLACK_CONNECTOR", |
| 40 | + "TEAMS_CONNECTOR": "TEAMS_CONNECTOR", |
| 41 | + "NOTION_CONNECTOR": "NOTION_CONNECTOR", |
| 42 | + "GITHUB_CONNECTOR": "GITHUB_CONNECTOR", |
| 43 | + "LINEAR_CONNECTOR": "LINEAR_CONNECTOR", |
| 44 | + "DISCORD_CONNECTOR": "DISCORD_CONNECTOR", |
| 45 | + "JIRA_CONNECTOR": "JIRA_CONNECTOR", |
| 46 | + "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR", |
| 47 | + "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR", |
| 48 | + "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR", |
| 49 | + "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR", |
| 50 | + "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type |
| 51 | + "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR", |
| 52 | + "LUMA_CONNECTOR": "LUMA_CONNECTOR", |
| 53 | + "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR", |
| 54 | + "WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type |
| 55 | + "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR", |
| 56 | + "CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type |
| 57 | + "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR", |
| 58 | + # Composio connectors |
| 59 | + "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR", |
| 60 | + "COMPOSIO_GMAIL_CONNECTOR": "COMPOSIO_GMAIL_CONNECTOR", |
| 61 | + "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR", |
| 62 | +} |
| 63 | + |
| 64 | +# Document types that don't come from SearchSourceConnector but should always be searchable |
| 65 | +_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [ |
| 66 | + "EXTENSION", # Browser extension data |
| 67 | + "FILE", # Uploaded files |
| 68 | + "NOTE", # User notes |
| 69 | + "YOUTUBE_VIDEO", # YouTube videos |
| 70 | +] |
| 71 | + |
| 72 | + |
| 73 | +def _map_connectors_to_searchable_types( |
| 74 | + connector_types: list[Any], |
| 75 | +) -> list[str]: |
| 76 | + """ |
| 77 | + Map SearchSourceConnectorType enums to searchable document/connector types. |
| 78 | +
|
| 79 | + This function: |
| 80 | + 1. Converts connector type enums to their searchable counterparts |
| 81 | + 2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO) |
| 82 | + 3. Deduplicates while preserving order |
| 83 | +
|
| 84 | + Args: |
| 85 | + connector_types: List of SearchSourceConnectorType enum values |
| 86 | +
|
| 87 | + Returns: |
| 88 | + List of searchable connector/document type strings |
| 89 | + """ |
| 90 | + result_set: set[str] = set() |
| 91 | + result_list: list[str] = [] |
| 92 | + |
| 93 | + # Add always-available document types first |
| 94 | + for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES: |
| 95 | + if doc_type not in result_set: |
| 96 | + result_set.add(doc_type) |
| 97 | + result_list.append(doc_type) |
| 98 | + |
| 99 | + # Map each connector type to its searchable equivalent |
| 100 | + for ct in connector_types: |
| 101 | + # Handle both enum and string types |
| 102 | + ct_str = ct.value if hasattr(ct, "value") else str(ct) |
| 103 | + searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str) |
| 104 | + if searchable and searchable not in result_set: |
| 105 | + result_set.add(searchable) |
| 106 | + result_list.append(searchable) |
| 107 | + |
| 108 | + return result_list |
| 109 | + |
| 110 | + |
26 | 111 | # ============================================================================= |
27 | 112 | # Deep Agent Factory |
28 | 113 | # ============================================================================= |
@@ -116,13 +201,40 @@ async def create_surfsense_deep_agent( |
116 | 201 | additional_tools=[my_custom_tool] |
117 | 202 | ) |
118 | 203 | """ |
| 204 | + # Discover available connectors and document types for this search space |
| 205 | + # This enables dynamic tool docstrings that inform the LLM about what's actually available |
| 206 | + available_connectors: list[str] | None = None |
| 207 | + available_document_types: list[str] | None = None |
| 208 | + |
| 209 | + try: |
| 210 | + # Get enabled search source connectors for this search space |
| 211 | + connector_types = await connector_service.get_available_connectors( |
| 212 | + search_space_id |
| 213 | + ) |
| 214 | + if connector_types: |
| 215 | + # Convert enum values to strings and also include mapped document types |
| 216 | + available_connectors = _map_connectors_to_searchable_types(connector_types) |
| 217 | + |
| 218 | + # Get document types that have at least one document indexed |
| 219 | + available_document_types = await connector_service.get_available_document_types( |
| 220 | + search_space_id |
| 221 | + ) |
| 222 | + except Exception as e: |
| 223 | + # Log but don't fail - fall back to all connectors if discovery fails |
| 224 | + import logging |
| 225 | + |
| 226 | + logging.warning(f"Failed to discover available connectors/document types: {e}") |
| 227 | + |
119 | 228 | # Build dependencies dict for the tools registry |
120 | 229 | dependencies = { |
121 | 230 | "search_space_id": search_space_id, |
122 | 231 | "db_session": db_session, |
123 | 232 | "connector_service": connector_service, |
124 | 233 | "firecrawl_api_key": firecrawl_api_key, |
125 | 234 | "user_id": user_id, # Required for memory tools |
| 235 | + # Dynamic connector/document type discovery for knowledge base tool |
| 236 | + "available_connectors": available_connectors, |
| 237 | + "available_document_types": available_document_types, |
126 | 238 | } |
127 | 239 |
|
128 | 240 | # Build tools using the async registry (includes MCP tools) |
|
0 commit comments