Skip to content

Commit b87a8af

Browse files
authored
Merge pull request #740 from MODSetter/dev
feat(0.0.12): bumped version, added composio connectors and various fixes
2 parents df6943b + 09162ad commit b87a8af

File tree

77 files changed

+5672
-2277
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+5672
-2277
lines changed

README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ SurfSense is a highly customizable AI research agent, connected to external sour
2929

3030
# Video
3131

32-
https://github.com/user-attachments/assets/42a29ea1-d4d8-4213-9c69-972b5b806d58
33-
32+
https://github.com/user-attachments/assets/cc0c84d3-1f2f-4f7a-b519-2ecce22310b1
3433

3534

3635
## Podcast Sample
@@ -52,15 +51,18 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7
5251
- Interact in Natural Language and get cited answers.
5352
### 📄 **Cited Answers**
5453
- Get Cited answers just like Perplexity.
54+
### 🧩 **Universal Compatibility**
55+
- Connect virtually any inference provider via the OpenAI spec and LiteLLM.
5556
### 🔔 **Privacy & Local LLM Support**
56-
- Works Flawlessly with Ollama local LLMs.
57+
- Works Flawlessly with local LLMs like vLLM and Ollama.
5758
### 🏠 **Self Hostable**
5859
- Open source and easy to deploy locally.
5960
### 👥 **Team Collaboration with RBAC**
6061
- Role-Based Access Control for Search Spaces
6162
- Invite team members with customizable roles (Owner, Admin, Editor, Viewer)
6263
- Granular permissions for documents, chats, connectors, and settings
6364
- Share knowledge bases securely within your organization
65+
- Team chats update in real-time and "Chat about the chat" in comment threads
6466
### 🎙️ Podcasts
6567
- Blazingly fast podcast generation agent. (Creates a 3-minute podcast in under 20 seconds.)
6668
- Convert your chat conversations into engaging audio content
@@ -237,6 +239,8 @@ Before self-hosting installation, make sure to complete the [prerequisite setup
237239

238240
### **BackEnd**
239241

242+
- **LiteLLM**: Universal LLM integration supporting 100+ models (OpenAI, Anthropic, Ollama, etc.)
243+
240244
- **FastAPI**: Modern, fast web framework for building APIs with Python
241245

242246
- **PostgreSQL with pgvector**: Database with vector search capabilities for similarity searches
@@ -253,8 +257,6 @@ Before self-hosting installation, make sure to complete the [prerequisite setup
253257

254258
- **LangChain**: Framework for developing AI-powered applications.
255259

256-
- **LiteLLM**: Universal LLM integration supporting 100+ models (OpenAI, Anthropic, Ollama, etc.)
257-
258260
- **Rerankers**: Advanced result ranking for improved search relevance
259261

260262
- **Hybrid Search**: Combines vector similarity and full-text search for optimal results using Reciprocal Rank Fusion (RRF)
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""Add Composio connector types to SearchSourceConnectorType and DocumentType enums
2+
3+
Revision ID: 79
4+
Revises: 78
5+
6+
This migration adds the Composio connector enum values to both:
7+
- searchsourceconnectortype (for connector type tracking)
8+
- documenttype (for document type tracking)
9+
10+
Composio is a managed OAuth integration service that allows connecting
11+
to various third-party services (Google Drive, Gmail, Calendar, etc.)
12+
without requiring separate OAuth app verification.
13+
14+
This migration adds three specific connector types:
15+
- COMPOSIO_GOOGLE_DRIVE_CONNECTOR
16+
- COMPOSIO_GMAIL_CONNECTOR
17+
- COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
18+
"""
19+
20+
from collections.abc import Sequence
21+
22+
from alembic import op
23+
24+
# revision identifiers, used by Alembic.
25+
revision: str = "79"
26+
down_revision: str | None = "78"
27+
branch_labels: str | Sequence[str] | None = None
28+
depends_on: str | Sequence[str] | None = None
29+
30+
# Define the ENUM type names and the new values
31+
CONNECTOR_ENUM = "searchsourceconnectortype"
32+
CONNECTOR_NEW_VALUES = [
33+
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
34+
"COMPOSIO_GMAIL_CONNECTOR",
35+
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
36+
]
37+
DOCUMENT_ENUM = "documenttype"
38+
DOCUMENT_NEW_VALUES = [
39+
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
40+
"COMPOSIO_GMAIL_CONNECTOR",
41+
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
42+
]
43+
44+
45+
def upgrade() -> None:
46+
"""Upgrade schema - add Composio connector types to connector and document enums safely."""
47+
# Add each Composio connector type to searchsourceconnectortype only if not exists
48+
for value in CONNECTOR_NEW_VALUES:
49+
op.execute(
50+
f"""
51+
DO $$
52+
BEGIN
53+
IF NOT EXISTS (
54+
SELECT 1 FROM pg_enum e
55+
JOIN pg_type t ON e.enumtypid = t.oid
56+
WHERE t.typname = '{CONNECTOR_ENUM}' AND e.enumlabel = '{value}'
57+
) THEN
58+
ALTER TYPE {CONNECTOR_ENUM} ADD VALUE '{value}';
59+
END IF;
60+
END$$;
61+
"""
62+
)
63+
64+
# Add each Composio connector type to documenttype only if not exists
65+
for value in DOCUMENT_NEW_VALUES:
66+
op.execute(
67+
f"""
68+
DO $$
69+
BEGIN
70+
IF NOT EXISTS (
71+
SELECT 1 FROM pg_enum e
72+
JOIN pg_type t ON e.enumtypid = t.oid
73+
WHERE t.typname = '{DOCUMENT_ENUM}' AND e.enumlabel = '{value}'
74+
) THEN
75+
ALTER TYPE {DOCUMENT_ENUM} ADD VALUE '{value}';
76+
END IF;
77+
END$$;
78+
"""
79+
)
80+
81+
82+
def downgrade() -> None:
83+
"""Downgrade schema - remove Composio connector types from connector and document enums.
84+
85+
Note: PostgreSQL does not support removing enum values directly.
86+
To properly downgrade, you would need to:
87+
1. Delete any rows using the Composio connector type values
88+
2. Create new enums without the Composio connector types
89+
3. Alter the columns to use the new enums
90+
4. Drop the old enums
91+
92+
This is left as a no-op since removing enum values is complex
93+
and typically not needed in practice.
94+
"""
95+
pass

surfsense_backend/app/agents/new_chat/chat_deepagent.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88

99
from collections.abc import Sequence
10+
from typing import Any
1011

1112
from deepagents import create_deep_agent
1213
from langchain_core.tools import BaseTool
@@ -23,6 +24,90 @@
2324
from app.agents.new_chat.tools.registry import build_tools_async
2425
from app.services.connector_service import ConnectorService
2526

27+
# =============================================================================
28+
# Connector Type Mapping
29+
# =============================================================================
30+
31+
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
32+
# used by the knowledge_base tool. Some connectors map to different document types.
33+
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
34+
# Direct mappings (connector type == searchable type)
35+
"TAVILY_API": "TAVILY_API",
36+
"SEARXNG_API": "SEARXNG_API",
37+
"LINKUP_API": "LINKUP_API",
38+
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
39+
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
40+
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
41+
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
42+
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
43+
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
44+
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
45+
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
46+
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
47+
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
48+
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
49+
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
50+
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
51+
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
52+
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
53+
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
54+
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
55+
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
56+
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
57+
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
58+
# Composio connectors
59+
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
60+
"COMPOSIO_GMAIL_CONNECTOR": "COMPOSIO_GMAIL_CONNECTOR",
61+
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
62+
}
63+
64+
# Document types that don't come from SearchSourceConnector but should always be searchable
65+
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
66+
"EXTENSION", # Browser extension data
67+
"FILE", # Uploaded files
68+
"NOTE", # User notes
69+
"YOUTUBE_VIDEO", # YouTube videos
70+
]
71+
72+
73+
def _map_connectors_to_searchable_types(
74+
connector_types: list[Any],
75+
) -> list[str]:
76+
"""
77+
Map SearchSourceConnectorType enums to searchable document/connector types.
78+
79+
This function:
80+
1. Converts connector type enums to their searchable counterparts
81+
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
82+
3. Deduplicates while preserving order
83+
84+
Args:
85+
connector_types: List of SearchSourceConnectorType enum values
86+
87+
Returns:
88+
List of searchable connector/document type strings
89+
"""
90+
result_set: set[str] = set()
91+
result_list: list[str] = []
92+
93+
# Add always-available document types first
94+
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
95+
if doc_type not in result_set:
96+
result_set.add(doc_type)
97+
result_list.append(doc_type)
98+
99+
# Map each connector type to its searchable equivalent
100+
for ct in connector_types:
101+
# Handle both enum and string types
102+
ct_str = ct.value if hasattr(ct, "value") else str(ct)
103+
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
104+
if searchable and searchable not in result_set:
105+
result_set.add(searchable)
106+
result_list.append(searchable)
107+
108+
return result_list
109+
110+
26111
# =============================================================================
27112
# Deep Agent Factory
28113
# =============================================================================
@@ -116,13 +201,40 @@ async def create_surfsense_deep_agent(
116201
additional_tools=[my_custom_tool]
117202
)
118203
"""
204+
# Discover available connectors and document types for this search space
205+
# This enables dynamic tool docstrings that inform the LLM about what's actually available
206+
available_connectors: list[str] | None = None
207+
available_document_types: list[str] | None = None
208+
209+
try:
210+
# Get enabled search source connectors for this search space
211+
connector_types = await connector_service.get_available_connectors(
212+
search_space_id
213+
)
214+
if connector_types:
215+
# Convert enum values to strings and also include mapped document types
216+
available_connectors = _map_connectors_to_searchable_types(connector_types)
217+
218+
# Get document types that have at least one document indexed
219+
available_document_types = await connector_service.get_available_document_types(
220+
search_space_id
221+
)
222+
except Exception as e:
223+
# Log but don't fail - fall back to all connectors if discovery fails
224+
import logging
225+
226+
logging.warning(f"Failed to discover available connectors/document types: {e}")
227+
119228
# Build dependencies dict for the tools registry
120229
dependencies = {
121230
"search_space_id": search_space_id,
122231
"db_session": db_session,
123232
"connector_service": connector_service,
124233
"firecrawl_api_key": firecrawl_api_key,
125234
"user_id": user_id, # Required for memory tools
235+
# Dynamic connector/document type discovery for knowledge base tool
236+
"available_connectors": available_connectors,
237+
"available_document_types": available_document_types,
126238
}
127239

128240
# Build tools using the async registry (includes MCP tools)

surfsense_backend/app/agents/new_chat/tools/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# Tool factory exports (for direct use)
2020
from .display_image import create_display_image_tool
2121
from .knowledge_base import (
22+
CONNECTOR_DESCRIPTIONS,
2223
create_search_knowledge_base_tool,
2324
format_documents_for_context,
2425
search_knowledge_base_async,
@@ -40,6 +41,8 @@
4041
__all__ = [
4142
# Registry
4243
"BUILTIN_TOOLS",
44+
# Knowledge base utilities
45+
"CONNECTOR_DESCRIPTIONS",
4346
"ToolDefinition",
4447
"build_tools",
4548
# Tool factories
@@ -51,7 +54,6 @@
5154
"create_scrape_webpage_tool",
5255
"create_search_knowledge_base_tool",
5356
"create_search_surfsense_docs_tool",
54-
# Knowledge base utilities
5557
"format_documents_for_context",
5658
"get_all_tool_names",
5759
"get_default_enabled_tools",

0 commit comments

Comments
 (0)