Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 102 additions & 1 deletion maestro_backend/ai_researcher/agentic_layer/tools/web_search_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from ai_researcher.dynamic_config import (
get_web_search_provider, get_tavily_api_key, get_linkup_api_key, get_searxng_base_url, get_searxng_categories,
get_jina_api_key, get_search_depth,
get_jina_read_full_content, get_jina_fetch_favicons, get_jina_bypass_cache
get_jina_read_full_content, get_jina_fetch_favicons, get_jina_bypass_cache,
get_yacy_base_url,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -131,6 +132,19 @@ def __init__(self, controller=None):
self.client = api_key # Store the API key as the "client" for Jina
self.api_key_configured = True
logger.info("WebSearchTool initialized with Jina.")
elif self.provider == "yacy":
if not requests:
raise ImportError("YaCy provider selected, but 'requests' library not installed.")
base_url = get_yacy_base_url()
if not base_url:
logger.warning("YaCy base URL not configured in user settings or environment variables.")
self.api_key_configured = False
return
self.client = {
"base_url": base_url.rstrip('/'),
}
self.api_key_configured = True
logger.info("WebSearchTool initialized with YaCy.")
else:
raise ValueError(f"Unsupported web search provider configured: {self.provider}")
except Exception as e:
Expand Down Expand Up @@ -462,6 +476,93 @@ async def _execute_search(
logger.info(f"Jina search returned no results for query: {search_query}")
# Don't set error_msg here, just return empty results

elif self.provider == "yacy":
# YaCy search
base_url = self.client["base_url"]

search_url = f"{base_url}/yacysearch.json"

# Build YaCy search parameters
params = {"query": search_query, "count": max_results, "format": "json"}

# Add optional parameters
if from_date:
params["start_date"] = from_date
if to_date:
params["end_date"] = to_date

if include_domains:
# Add domain filter
params["site"] = ",".join(include_domains)

if exclude_domains:
# Add exclusion filter
for domain in exclude_domains:
params["exclude"] = (f"{params.get('exclude', '')} -site:{domain}".strip())

async with aiohttp.ClientSession() as session:
async with session.get(
search_url,
params=params,
timeout=aiohttp.ClientTimeout(total=30),
) as response:
if response.status == 401:
error_msg = f"YaCy returns unauthorized. Check your configuration."
logger.error(error_msg)
else:
response.raise_for_status()
search_data = await response.json()

# Handle YACY response format
if isinstance(search_data, dict):
# YaCy returns results under channels[0].items
channels = search_data.get("channels", [])
if channels and len(channels) > 0:
# Get the first channel's items
first_channel = channels[0]
if isinstance(first_channel, dict):
results = first_channel.get("items", [])
else:
results = []
else:
# Fallback to other formats
results = search_data.get("results", [])
if not results:
# Try alternative field names
results = search_data.get("search", [])
if not results:
# Another common format
response_data = search_data.get("response", {})
if isinstance(response_data, dict):
results = response_data.get("results", [])
else:
results = []

for result in results[:max_results]:
formatted_results.append({
"title": result.get('title', result.get('name', 'No Title')),
"snippet": result.get('description', result.get('content', result.get('snippet', 'No Snippet'))),
"url": result.get("url", result.get("link", "#")),
})
elif isinstance(search_data, list):
# Direct list format
for result in search_data[:max_results]:
formatted_results.append({
"title": result.get("title", result.get("name", "No Title")),
"snippet": result.get("description", result.get("content", result.get("snippet", "No Snippet"))),
"url": result.get("url", result.get("link", "#")),
})

if error_msg:
logger.warning(f"Unexpected YaCy response format: {type(search_data)}")
# Direct list format
for result in search_data[:max_results]:
formatted_results.append({
"title": result.get("title", result.get("name", "No Title")),
"snippet": result.get("description", result.get("content", result.get("snippet", "No Snippet"))),
"url": result.get("url", result.get("link", "#")),
})

if error_msg:
return {"error": error_msg}

Expand Down
13 changes: 13 additions & 0 deletions maestro_backend/ai_researcher/dynamic_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
#from pathlib import Path
from typing import Dict, Any, Optional

from ai_researcher.user_context import get_user_settings
Expand Down Expand Up @@ -222,6 +223,18 @@ def get_searxng_categories(mission_id: Optional[str] = None) -> str:
return os.getenv("SEARXNG_CATEGORIES", "general")


def get_yacy_base_url(mission_id: Optional[str] = None) -> Optional[str]:
"""Get the YACY base URL from user settings or environment."""
# Check user settings first
user_settings = get_user_settings()
if user_settings:
search_settings = user_settings.get("search", {})
if search_settings and search_settings.get("yacy_base_url"):
return search_settings["yacy_base_url"]

# Fallback to environment variable
return os.getenv("YACY_BASE_URL")

def get_search_depth(mission_id: Optional[str] = None) -> str:
"""Get the search depth (standard/advanced) from user settings or environment."""
# Check user settings first
Expand Down
1 change: 1 addition & 0 deletions maestro_backend/api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class SearchSettings(BaseModel):
jina_read_full_content: Optional[bool] = None
jina_fetch_favicons: Optional[bool] = None
jina_bypass_cache: Optional[bool] = None
yacy_base_url: Optional[str] = None

class WebFetchSettings(BaseModel):
provider: str = "original" # "original", "jina", or "original_with_jina_fallback"
Expand Down
71 changes: 47 additions & 24 deletions maestro_frontend/src/features/auth/components/SearchSettingsTab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -28,51 +28,51 @@ const SEARXNG_CATEGORIES = [
export const SearchSettingsTab: React.FC = () => {
const { draftSettings, setDraftSettings } = useSettingsStore()

const handleProviderChange = (provider: 'tavily' | 'linkup' | 'searxng' | 'jina') => {
const handleProviderChange = (provider: 'tavily' | 'linkup' | 'searxng' | 'jina' | 'yacy') => {
if (!draftSettings) return

const newSearch = {
...draftSettings.search,
provider
}

setDraftSettings({ search: newSearch })
}

const handleApiKeyChange = (field: string, value: string | boolean | number) => {
if (!draftSettings) return

const newSearch = {
...draftSettings.search,
[field]: value
}

setDraftSettings({ search: newSearch })
}

const handleCategoriesChange = (categoryValue: string, checked: boolean) => {
if (!draftSettings) return

const currentCategories = draftSettings.search.searxng_categories || 'general'
const categoriesArray = currentCategories.split(',').map(c => c.trim()).filter(c => c)

let newCategoriesArray
if (checked) {
newCategoriesArray = [...categoriesArray.filter(c => c !== categoryValue), categoryValue]
} else {
newCategoriesArray = categoriesArray.filter(c => c !== categoryValue)
}

// Ensure at least one category is selected
if (newCategoriesArray.length === 0) {
newCategoriesArray = ['general']
}

const newSearch = {
...draftSettings.search,
searxng_categories: newCategoriesArray.join(',')
}

setDraftSettings({ search: newSearch })
}

Expand Down Expand Up @@ -120,6 +120,7 @@ export const SearchSettingsTab: React.FC = () => {
<SelectItem value="linkup">LinkUp</SelectItem>
<SelectItem value="searxng">SearXNG</SelectItem>
<SelectItem value="jina">Jina</SelectItem>
<SelectItem value="yacy">YaCy</SelectItem>
</SelectContent>
</Select>
</div>
Expand All @@ -142,9 +143,9 @@ export const SearchSettingsTab: React.FC = () => {
</div>
<p className="text-xs text-muted-foreground">
Get your API key from{' '}
<a
href="https://app.tavily.com/home"
target="_blank"
<a
href="https://app.tavily.com/home"
target="_blank"
rel="noopener noreferrer"
className="text-blue-600 hover:underline"
>
Expand Down Expand Up @@ -172,9 +173,9 @@ export const SearchSettingsTab: React.FC = () => {
</div>
<p className="text-xs text-muted-foreground">
Get your API key from{' '}
<a
href="https://linkup.com/dashboard"
target="_blank"
<a
href="https://linkup.com/dashboard"
target="_blank"
rel="noopener noreferrer"
className="text-blue-600 hover:underline"
>
Expand Down Expand Up @@ -202,9 +203,9 @@ export const SearchSettingsTab: React.FC = () => {
</div>
<p className="text-xs text-muted-foreground">
Get your API key from{' '}
<a
href="https://jina.ai/reader"
target="_blank"
<a
href="https://jina.ai/reader"
target="_blank"
rel="noopener noreferrer"
className="text-orange-600 hover:underline"
>
Expand Down Expand Up @@ -283,9 +284,9 @@ export const SearchSettingsTab: React.FC = () => {
</div>
<p className="text-xs text-muted-foreground">
Enter the URL of your SearXNG instance. You can use a public instance or{' '}
<a
href="https://docs.searxng.org/"
target="_blank"
<a
href="https://docs.searxng.org/"
target="_blank"
rel="noopener noreferrer"
className="text-purple-600 hover:underline"
>
Expand All @@ -296,6 +297,28 @@ export const SearchSettingsTab: React.FC = () => {
</p>
</div>
)}

{draftSettings.search.provider === 'yacy' && (
<div className="space-y-3 pl-3 border-l-2 border-indigo-200 bg-indigo-50/30 rounded-r-lg p-3">
<p className="text-xs text-muted-foreground-foreground mb-2">
Custom YaCy instance search interface.
</p>
<div className="space-y-1.5">
<Label htmlFor="yacy-base-url" className="text-sm">YaCy Base URL</Label>
<Input
id="yacy-base-url"
type="url"
value={draftSettings.search.yacy_base_url || ''}
onChange={(e) => handleApiKeyChange('yacy_base_url', e.target.value)}
placeholder="https://your-yacy-instance.com"
className="h-8 text-sm"
/>
</div>
<p className="text-xs text-muted-foreground">
Enter the base URL of your YaCy instance. YaCy should have json results enabled.
</p>
</div>
)}
</div>
</CardContent>
</Card>
Expand Down Expand Up @@ -338,7 +361,7 @@ export const SearchSettingsTab: React.FC = () => {
</SelectContent>
</Select>
<p className="text-xs text-muted-foreground">
{draftSettings.search.provider === 'tavily'
{draftSettings.search.provider === 'tavily'
? 'Advanced search provides more comprehensive results but costs 2x API credits.'
: 'Deep search uses an agentic workflow for more comprehensive results but takes longer.'}
</p>
Expand Down Expand Up @@ -477,7 +500,7 @@ export const SearchSettingsTab: React.FC = () => {
<span className="text-sm text-muted-foreground">characters</span>
</div>
<p className="text-xs text-muted-foreground">
Maximum length for search queries (100-400 characters). Queries exceeding this limit will be intelligently refined to preserve search intent.
Maximum length for search queries (100-400 characters). Queries exceeding this limit will be intelligently refined to preserve search intent.
Default is 350 to ensure compatibility with most search providers. Tavily has a hard limit of 400 characters.
</p>
</div>
Expand Down
Loading