diff --git a/pyproject.toml b/pyproject.toml index 921fdc8..01ec327 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "qdrant-client>=1.15.1", "streamlit>=1.48.0", "typer>=0.16.0", + "youtube-transcript-api>=1.2.2", ] [dependency-groups] diff --git a/scripts/agent_operator.py b/scripts/agent_operator.py index d8065ee..0600cc7 100644 --- a/scripts/agent_operator.py +++ b/scripts/agent_operator.py @@ -9,10 +9,23 @@ from template_langgraph.agents.image_classifier_agent.models import Results from template_langgraph.agents.issue_formatter_agent.agent import graph as issue_formatter_agent_graph from template_langgraph.agents.kabuto_helpdesk_agent.agent import graph as kabuto_helpdesk_agent_graph +from template_langgraph.agents.news_summarizer_agent.agent import MockNotifier, NewsSummarizerAgent from template_langgraph.agents.news_summarizer_agent.agent import ( graph as news_summarizer_agent_graph, ) -from template_langgraph.agents.news_summarizer_agent.models import Article +from template_langgraph.agents.news_summarizer_agent.models import ( + AgentInputState, + AgentState, + Article, +) +from template_langgraph.agents.news_summarizer_agent.scrapers import ( + BaseScraper, + HttpxScraper, + YouTubeTranscriptScraper, +) +from template_langgraph.agents.news_summarizer_agent.summarizers import ( + LlmSummarizer, +) from template_langgraph.agents.task_decomposer_agent.agent import graph as task_decomposer_agent_graph from template_langgraph.loggers import get_logger @@ -43,6 +56,18 @@ def get_agent_graph(name: str): raise ValueError(f"Unknown agent name: {name}") +def get_scraper(scraper_type: str) -> BaseScraper: + scraper = None + if scraper_type == "Httpx": + scraper = HttpxScraper() + elif scraper_type == "YouTubeTranscript": + scraper = YouTubeTranscriptScraper() + + if not scraper: + raise ValueError(f"Unknown scraper type: {scraper_type}") + return scraper + + @app.command() def png( name: str = typer.Option( @@ -134,6 +159,12 @@ def news_summarizer_agent( "-u", help="Comma-separated list of URLs to summarize", ), + scraper: str = typer.Option( + "Httpx", # YouTubeTranscript + "--scraper", + "-s", + help="Scraper to use for fetching content", + ), verbose: bool = typer.Option( False, "--verbose", @@ -141,16 +172,15 @@ def news_summarizer_agent( help="Enable verbose output", ), ): - from template_langgraph.agents.news_summarizer_agent.models import ( - AgentInputState, - AgentState, - ) - # Set up logging if verbose: logger.setLevel(logging.DEBUG) - graph = news_summarizer_agent_graph + graph = NewsSummarizerAgent( + notifier=MockNotifier(), + scraper=get_scraper(scraper), + summarizer=LlmSummarizer(), + ).create_graph() for event in graph.stream( input=AgentState( input=AgentInputState( diff --git a/template_langgraph/agents/news_summarizer_agent/scrapers.py b/template_langgraph/agents/news_summarizer_agent/scrapers.py index 99fdcc2..0ece189 100644 --- a/template_langgraph/agents/news_summarizer_agent/scrapers.py +++ b/template_langgraph/agents/news_summarizer_agent/scrapers.py @@ -10,6 +10,7 @@ from abc import ABC, abstractmethod import httpx +from youtube_transcript_api import YouTubeTranscriptApi from template_langgraph.loggers import get_logger @@ -39,7 +40,7 @@ def scrape(self, url: str) -> str: # pragma: no cover - interface class MockScraper(BaseScraper): """Deterministic scraper for tests / offline development.""" - def scrape(self, url: str) -> str: # noqa: D401 + def scrape(self, url: str) -> str: logger.info(f"Mock scrape for URL: {url}") return "