Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,16 @@ tools:
base_class: path.to.my.tools.CustomTool
my_other_tool:
base_class: "name_of_tool_class_in_registry"
# Search tools: configure Tavily API key and search limits per tool
# Search tools: configure search provider and API keys per tool
# (can be overridden per-agent in tools list)
web_search_tool:
tavily_api_key: "your-tavily-api-key-here" # Tavily API key (get at tavily.com)
tavily_api_base_url: "https://api.tavily.com" # Tavily API URL
engine: "tavily" # Search engine: "tavily" (default), "brave", or "perplexity"
api_key: "your-search-api-key-here" # API key for the selected engine
# api_base_url: "https://custom-url" # Optional, uses engine default
max_results: 12
max_searches: 6
extract_page_content_tool:
tavily_api_key: "your-tavily-api-key-here" # Same Tavily API key
tavily_api_key: "your-tavily-api-key-here" # Tavily API key (Tavily-only feature)
tavily_api_base_url: "https://api.tavily.com"
content_limit: 2000

Expand Down
7 changes: 3 additions & 4 deletions examples/sgr_deep_research/config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,12 @@ tools:
# Core tools (base_class defaults to sgr_agent_core.tools.*)
# Search tools: configure Tavily API key and search limits per tool
web_search_tool:
tavily_api_key: "your-tavily-api-key-here" # Tavily API key (get at tavily.com)
tavily_api_base_url: "https://api.tavily.com" # Tavily API URL
engine: "tavily" # Search engine: "tavily" (default), "brave", or "perplexity"
api_key: "your-tavily-api-key-here" # API key for the selected engine
max_searches: 4 # Max search operations
max_results: 10 # Max results in search query
extract_page_content_tool:
tavily_api_key: "your-tavily-api-key-here" # Same Tavily API key
tavily_api_base_url: "https://api.tavily.com"
tavily_api_key: "your-tavily-api-key-here" # Tavily API key (Tavily extract only)
content_limit: 1500 # Content char limit per source
create_report_tool:
# base_class defaults to sgr_agent_core.tools.CreateReportTool
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@ tools:
# Core tools (base_class defaults to sgr_agent_core.tools.*)
# Search tools: configure Tavily API key and search limits per tool
web_search_tool:
tavily_api_key: "your-tavily-api-key-here" # Tavily API key (get at tavily.com)
tavily_api_base_url: "https://api.tavily.com" # Tavily API URL
engine: "tavily" # Search engine: "tavily" (default), "brave", or "perplexity"
api_key: "your-tavily-api-key-here" # API key for the selected engine
max_searches: 4 # Max search operations
max_results: 10 # Max results in search query
extract_page_content_tool:
tavily_api_key: "your-tavily-api-key-here" # Same Tavily API key
tavily_api_base_url: "https://api.tavily.com"
tavily_api_key: "your-tavily-api-key-here" # Tavily API key (Tavily extract only)
content_limit: 1500 # Content char limit per source
final_answer_tool:
# base_class defaults to sgr_agent_core.tools.FinalAnswerTool
Expand Down
16 changes: 8 additions & 8 deletions sgr_agent_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@
SourceData,
)
from sgr_agent_core.next_step_tool import NextStepToolsBuilder, NextStepToolStub
from sgr_agent_core.services import AgentRegistry, MCP2ToolConverter, PromptLoader, ToolRegistry
from sgr_agent_core.services import (
AgentRegistry,
MCP2ToolConverter,
PromptLoader,
ToolRegistry,
)
from sgr_agent_core.tools import * # noqa: F403

__all__ = [
Expand All @@ -50,9 +55,9 @@
"SourceData",
# Services
"AgentRegistry",
"ToolRegistry",
"PromptLoader",
"MCP2ToolConverter",
"PromptLoader",
"ToolRegistry",
# Configuration
"AgentConfig",
"AgentDefinition",
Expand All @@ -64,11 +69,6 @@
# Next step tools
"NextStepToolStub",
"NextStepToolsBuilder",
# Models
"AgentStatesEnum",
"AgentContext",
"SearchResult",
"SourceData",
# Factory
"AgentFactory",
]
5 changes: 2 additions & 3 deletions sgr_agent_core/agent_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,11 @@ def to_openai_client_kwargs(self) -> dict[str, Any]:


class SearchConfig(BaseModel, extra="allow"):
tavily_api_key: str | None = Field(default=None, description="Tavily API key")
tavily_api_base_url: str = Field(default="https://api.tavily.com", description="Tavily API base URL")

max_searches: int = Field(default=4, ge=0, description="Maximum number of searches")
max_results: int = Field(default=10, ge=1, description="Maximum number of search results")
content_limit: int = Field(default=3500, gt=0, description="Content character limit per source")
tavily_api_key: str | None = Field(default=None, description="Tavily API key")
tavily_api_base_url: str = Field(default="https://api.tavily.com", description="Tavily API base URL")


class PromptsConfig(BaseModel, extra="allow"):
Expand Down
8 changes: 5 additions & 3 deletions sgr_agent_core/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

from sgr_agent_core.services.mcp_service import MCP2ToolConverter
from sgr_agent_core.services.prompt_loader import PromptLoader
from sgr_agent_core.services.registry import AgentRegistry, StreamingGeneratorRegistry, ToolRegistry
from sgr_agent_core.services.tavily_search import TavilySearchService
from sgr_agent_core.services.registry import (
AgentRegistry,
StreamingGeneratorRegistry,
ToolRegistry,
)
from sgr_agent_core.services.tool_instantiator import ToolInstantiator

__all__ = [
"TavilySearchService",
"MCP2ToolConverter",
"ToolRegistry",
"StreamingGeneratorRegistry",
Expand Down
107 changes: 0 additions & 107 deletions sgr_agent_core/services/tavily_search.py

This file was deleted.

16 changes: 7 additions & 9 deletions sgr_agent_core/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sgr_agent_core.tools.final_answer_tool import FinalAnswerTool
from sgr_agent_core.tools.generate_plan_tool import GeneratePlanTool
from sgr_agent_core.tools.reasoning_tool import ReasoningTool
from sgr_agent_core.tools.web_search_tool import WebSearchTool
from sgr_agent_core.tools.web_search_tool import WebSearchConfig, WebSearchTool

__all__ = [
# Base classes
Expand All @@ -24,16 +24,14 @@
"ToolNameSelectorStub",
"NextStepToolsBuilder",
# Individual tools
"ClarificationTool",
"GeneratePlanTool",
"WebSearchTool",
"ExtractPageContentTool",
"AdaptPlanTool",
"CreateReportTool",
"AnswerTool",
"ClarificationTool",
"CreateReportTool",
"ExtractPageContentTool",
"FinalAnswerTool",
"GeneratePlanTool",
"ReasoningTool",
# Tool lists
"NextStepToolStub",
"NextStepToolsBuilder",
"WebSearchConfig",
"WebSearchTool",
]
53 changes: 44 additions & 9 deletions sgr_agent_core/tools/extract_page_content_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from typing import TYPE_CHECKING, Any

from pydantic import Field
from tavily import AsyncTavilyClient

from sgr_agent_core.agent_definition import SearchConfig
from sgr_agent_core.base_tool import BaseTool
from sgr_agent_core.services import TavilySearchService
from sgr_agent_core.models import SourceData

if TYPE_CHECKING:
from sgr_agent_core.agent_definition import AgentConfig
Expand All @@ -19,11 +20,12 @@

class ExtractPageContentTool(BaseTool):
"""Extract full detailed content from specific web pages.
Use for: Getting complete page content from URLs found in web search Returns:
Full page content in readable format (via Tavily Extract API)
Best for: Deep analysis of specific pages, extracting structured data

Usage: Call after WebSearchTool to get detailed information from promising URLs
Use for: Getting complete page content from URLs found in web search.
Returns: Full page content in readable format (via Tavily Extract API).
Best for: Deep analysis of specific pages, extracting structured data.

Usage: Call after WebSearchTool to get detailed information from promising URLs.

CRITICAL WARNINGS:
- Extracted pages may show data from DIFFERENT years/time periods than asked
Expand All @@ -38,13 +40,46 @@ class ExtractPageContentTool(BaseTool):
reasoning: str = Field(description="Why extract these specific pages")
urls: list[str] = Field(description="List of URLs to extract full content from", min_length=1, max_length=5)

@staticmethod
async def _extract(config: SearchConfig, urls: list[str]) -> list[SourceData]:
"""Extract full content from URLs via Tavily Extract API."""
logger.info(f"Tavily extract: {len(urls)} URLs")

client = AsyncTavilyClient(api_key=config.tavily_api_key, api_base_url=config.tavily_api_base_url)
response = await client.extract(urls=urls)

sources = []
for i, result in enumerate(response.get("results", [])):
if not result.get("url"):
continue

source = SourceData(
number=i,
title=result.get("url", "").split("/")[-1] or "Extracted Content",
url=result.get("url", ""),
snippet="",
full_content=result.get("raw_content", ""),
char_count=len(result.get("raw_content", "")),
)
sources.append(source)

failed_urls = response.get("failed_results", [])
if failed_urls:
logger.warning(f"Failed to extract {len(failed_urls)} URLs: {failed_urls}")

return sources

async def __call__(self, context: AgentContext, config: AgentConfig, **kwargs: Any) -> str:
"""Extract full content from specified URLs."""
search_config = SearchConfig(**kwargs)
logger.info(f"📄 Extracting content from {len(self.urls)} URLs")

self._search_service = TavilySearchService(search_config)
sources = await self._search_service.extract(urls=self.urls)
if not search_config.tavily_api_key:
return (
"Error: tavily_api_key is required for ExtractPageContentTool."
" Tavily is the only provider that supports content extraction."
)
logger.info(f"Extracting content from {len(self.urls)} URLs")

sources = await self._extract(search_config, urls=self.urls)

# Update existing sources instead of overwriting
for source in sources:
Expand Down
Loading
Loading