diff --git a/maestro_backend/ai_researcher/agentic_layer/tools/web_search_tool.py b/maestro_backend/ai_researcher/agentic_layer/tools/web_search_tool.py
index 2e04da9..d2d6cf8 100644
--- a/maestro_backend/ai_researcher/agentic_layer/tools/web_search_tool.py
+++ b/maestro_backend/ai_researcher/agentic_layer/tools/web_search_tool.py
@@ -13,7 +13,8 @@
from ai_researcher.dynamic_config import (
get_web_search_provider, get_tavily_api_key, get_linkup_api_key, get_searxng_base_url, get_searxng_categories,
get_jina_api_key, get_search_depth,
- get_jina_read_full_content, get_jina_fetch_favicons, get_jina_bypass_cache
+ get_jina_read_full_content, get_jina_fetch_favicons, get_jina_bypass_cache,
+ get_yacy_base_url,
)
logger = logging.getLogger(__name__)
@@ -131,6 +132,19 @@ def __init__(self, controller=None):
self.client = api_key # Store the API key as the "client" for Jina
self.api_key_configured = True
logger.info("WebSearchTool initialized with Jina.")
+ elif self.provider == "yacy":
+ if not requests:
+ raise ImportError("YaCy provider selected, but 'requests' library not installed.")
+ base_url = get_yacy_base_url()
+ if not base_url:
+ logger.warning("YaCy base URL not configured in user settings or environment variables.")
+ self.api_key_configured = False
+ return
+ self.client = {
+ "base_url": base_url.rstrip('/'),
+ }
+ self.api_key_configured = True
+ logger.info("WebSearchTool initialized with YaCy.")
else:
raise ValueError(f"Unsupported web search provider configured: {self.provider}")
except Exception as e:
@@ -462,6 +476,93 @@ async def _execute_search(
logger.info(f"Jina search returned no results for query: {search_query}")
# Don't set error_msg here, just return empty results
+ elif self.provider == "yacy":
+ # YaCy search
+ base_url = self.client["base_url"]
+
+ search_url = f"{base_url}/yacysearch.json"
+
+ # Build YaCy search parameters
+ params = {"query": search_query, "count": max_results, "format": "json"}
+
+ # Add optional parameters
+ if from_date:
+ params["start_date"] = from_date
+ if to_date:
+ params["end_date"] = to_date
+
+ if include_domains:
+ # Add domain filter
+ params["site"] = ",".join(include_domains)
+
+ if exclude_domains:
+ # Add exclusion filter
+ for domain in exclude_domains:
+ params["exclude"] = (f"{params.get('exclude', '')} -site:{domain}".strip())
+
+ async with aiohttp.ClientSession() as session:
+ async with session.get(
+ search_url,
+ params=params,
+ timeout=aiohttp.ClientTimeout(total=30),
+ ) as response:
+ if response.status == 401:
+ error_msg = f"YaCy returns unauthorized. Check your configuration."
+ logger.error(error_msg)
+ else:
+ response.raise_for_status()
+ search_data = await response.json()
+
+ # Handle YACY response format
+ if isinstance(search_data, dict):
+ # YaCy returns results under channels[0].items
+ channels = search_data.get("channels", [])
+ if channels and len(channels) > 0:
+ # Get the first channel's items
+ first_channel = channels[0]
+ if isinstance(first_channel, dict):
+ results = first_channel.get("items", [])
+ else:
+ results = []
+ else:
+ # Fallback to other formats
+ results = search_data.get("results", [])
+ if not results:
+ # Try alternative field names
+ results = search_data.get("search", [])
+ if not results:
+ # Another common format
+ response_data = search_data.get("response", {})
+ if isinstance(response_data, dict):
+ results = response_data.get("results", [])
+ else:
+ results = []
+
+ for result in results[:max_results]:
+ formatted_results.append({
+ "title": result.get('title', result.get('name', 'No Title')),
+ "snippet": result.get('description', result.get('content', result.get('snippet', 'No Snippet'))),
+ "url": result.get("url", result.get("link", "#")),
+ })
+ elif isinstance(search_data, list):
+ # Direct list format
+ for result in search_data[:max_results]:
+ formatted_results.append({
+ "title": result.get("title", result.get("name", "No Title")),
+ "snippet": result.get("description", result.get("content", result.get("snippet", "No Snippet"))),
+ "url": result.get("url", result.get("link", "#")),
+ })
+
+ if error_msg:
+ logger.warning(f"Unexpected YaCy response format: {type(search_data)}")
+ # Direct list format
+ for result in search_data[:max_results]:
+ formatted_results.append({
+ "title": result.get("title", result.get("name", "No Title")),
+ "snippet": result.get("description", result.get("content", result.get("snippet", "No Snippet"))),
+ "url": result.get("url", result.get("link", "#")),
+ })
+
if error_msg:
return {"error": error_msg}
diff --git a/maestro_backend/ai_researcher/dynamic_config.py b/maestro_backend/ai_researcher/dynamic_config.py
index 14187df..14a3622 100644
--- a/maestro_backend/ai_researcher/dynamic_config.py
+++ b/maestro_backend/ai_researcher/dynamic_config.py
@@ -1,4 +1,5 @@
import os
+#from pathlib import Path
from typing import Dict, Any, Optional
from ai_researcher.user_context import get_user_settings
@@ -222,6 +223,18 @@ def get_searxng_categories(mission_id: Optional[str] = None) -> str:
return os.getenv("SEARXNG_CATEGORIES", "general")
+def get_yacy_base_url(mission_id: Optional[str] = None) -> Optional[str]:
+ """Get the YACY base URL from user settings or environment."""
+ # Check user settings first
+ user_settings = get_user_settings()
+ if user_settings:
+ search_settings = user_settings.get("search", {})
+ if search_settings and search_settings.get("yacy_base_url"):
+ return search_settings["yacy_base_url"]
+
+ # Fallback to environment variable
+ return os.getenv("YACY_BASE_URL")
+
def get_search_depth(mission_id: Optional[str] = None) -> str:
"""Get the search depth (standard/advanced) from user settings or environment."""
# Check user settings first
diff --git a/maestro_backend/api/schemas.py b/maestro_backend/api/schemas.py
index 759ca94..b9071f7 100644
--- a/maestro_backend/api/schemas.py
+++ b/maestro_backend/api/schemas.py
@@ -86,6 +86,7 @@ class SearchSettings(BaseModel):
jina_read_full_content: Optional[bool] = None
jina_fetch_favicons: Optional[bool] = None
jina_bypass_cache: Optional[bool] = None
+ yacy_base_url: Optional[str] = None
class WebFetchSettings(BaseModel):
provider: str = "original" # "original", "jina", or "original_with_jina_fallback"
diff --git a/maestro_frontend/src/features/auth/components/SearchSettingsTab.tsx b/maestro_frontend/src/features/auth/components/SearchSettingsTab.tsx
index c7f4660..f1d06af 100644
--- a/maestro_frontend/src/features/auth/components/SearchSettingsTab.tsx
+++ b/maestro_frontend/src/features/auth/components/SearchSettingsTab.tsx
@@ -28,51 +28,51 @@ const SEARXNG_CATEGORIES = [
export const SearchSettingsTab: React.FC = () => {
const { draftSettings, setDraftSettings } = useSettingsStore()
- const handleProviderChange = (provider: 'tavily' | 'linkup' | 'searxng' | 'jina') => {
+ const handleProviderChange = (provider: 'tavily' | 'linkup' | 'searxng' | 'jina' | 'yacy') => {
if (!draftSettings) return
-
+
const newSearch = {
...draftSettings.search,
provider
}
-
+
setDraftSettings({ search: newSearch })
}
const handleApiKeyChange = (field: string, value: string | boolean | number) => {
if (!draftSettings) return
-
+
const newSearch = {
...draftSettings.search,
[field]: value
}
-
+
setDraftSettings({ search: newSearch })
}
const handleCategoriesChange = (categoryValue: string, checked: boolean) => {
if (!draftSettings) return
-
+
const currentCategories = draftSettings.search.searxng_categories || 'general'
const categoriesArray = currentCategories.split(',').map(c => c.trim()).filter(c => c)
-
+
let newCategoriesArray
if (checked) {
newCategoriesArray = [...categoriesArray.filter(c => c !== categoryValue), categoryValue]
} else {
newCategoriesArray = categoriesArray.filter(c => c !== categoryValue)
}
-
+
// Ensure at least one category is selected
if (newCategoriesArray.length === 0) {
newCategoriesArray = ['general']
}
-
+
const newSearch = {
...draftSettings.search,
searxng_categories: newCategoriesArray.join(',')
}
-
+
setDraftSettings({ search: newSearch })
}
@@ -120,6 +120,7 @@ export const SearchSettingsTab: React.FC = () => {
Get your API key from{' '}
-
@@ -172,9 +173,9 @@ export const SearchSettingsTab: React.FC = () => {
Get your API key from{' '}
-
@@ -202,9 +203,9 @@ export const SearchSettingsTab: React.FC = () => {
Get your API key from{' '}
-
@@ -283,9 +284,9 @@ export const SearchSettingsTab: React.FC = () => {
Enter the URL of your SearXNG instance. You can use a public instance or{' '}
-
@@ -296,6 +297,28 @@ export const SearchSettingsTab: React.FC = () => {
+ Custom YaCy instance search interface.
+
+ Enter the base URL of your YaCy instance. YaCy should have json results enabled.
+
- {draftSettings.search.provider === 'tavily'
+ {draftSettings.search.provider === 'tavily'
? 'Advanced search provides more comprehensive results but costs 2x API credits.'
: 'Deep search uses an agentic workflow for more comprehensive results but takes longer.'}
- Maximum length for search queries (100-400 characters). Queries exceeding this limit will be intelligently refined to preserve search intent.
+ Maximum length for search queries (100-400 characters). Queries exceeding this limit will be intelligently refined to preserve search intent.
Default is 350 to ensure compatibility with most search providers. Tavily has a hard limit of 400 characters.