Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"qdrant-client>=1.15.1",
"streamlit>=1.48.0",
"typer>=0.16.0",
"youtube-transcript-api>=1.2.2",
]

[dependency-groups]
Expand Down
44 changes: 37 additions & 7 deletions scripts/agent_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,23 @@
from template_langgraph.agents.image_classifier_agent.models import Results
from template_langgraph.agents.issue_formatter_agent.agent import graph as issue_formatter_agent_graph
from template_langgraph.agents.kabuto_helpdesk_agent.agent import graph as kabuto_helpdesk_agent_graph
from template_langgraph.agents.news_summarizer_agent.agent import MockNotifier, NewsSummarizerAgent
from template_langgraph.agents.news_summarizer_agent.agent import (
graph as news_summarizer_agent_graph,
)
from template_langgraph.agents.news_summarizer_agent.models import Article
from template_langgraph.agents.news_summarizer_agent.models import (
AgentInputState,
AgentState,
Article,
)
from template_langgraph.agents.news_summarizer_agent.scrapers import (
BaseScraper,
HttpxScraper,
YouTubeTranscriptScraper,
)
from template_langgraph.agents.news_summarizer_agent.summarizers import (
LlmSummarizer,
)
from template_langgraph.agents.task_decomposer_agent.agent import graph as task_decomposer_agent_graph
from template_langgraph.loggers import get_logger

Expand Down Expand Up @@ -43,6 +56,18 @@ def get_agent_graph(name: str):
raise ValueError(f"Unknown agent name: {name}")


def get_scraper(scraper_type: str) -> BaseScraper:
scraper = None
if scraper_type == "Httpx":
scraper = HttpxScraper()
elif scraper_type == "YouTubeTranscript":
scraper = YouTubeTranscriptScraper()

if not scraper:
raise ValueError(f"Unknown scraper type: {scraper_type}")
return scraper


@app.command()
def png(
name: str = typer.Option(
Expand Down Expand Up @@ -134,23 +159,28 @@ def news_summarizer_agent(
"-u",
help="Comma-separated list of URLs to summarize",
),
scraper: str = typer.Option(
"Httpx", # YouTubeTranscript
"--scraper",
"-s",
help="Scraper to use for fetching content",
),
verbose: bool = typer.Option(
False,
"--verbose",
"-v",
help="Enable verbose output",
),
):
from template_langgraph.agents.news_summarizer_agent.models import (
AgentInputState,
AgentState,
)

# Set up logging
if verbose:
logger.setLevel(logging.DEBUG)

graph = news_summarizer_agent_graph
graph = NewsSummarizerAgent(
notifier=MockNotifier(),
scraper=get_scraper(scraper),
summarizer=LlmSummarizer(),
).create_graph()
for event in graph.stream(
input=AgentState(
input=AgentInputState(
Expand Down
19 changes: 17 additions & 2 deletions template_langgraph/agents/news_summarizer_agent/scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from abc import ABC, abstractmethod

import httpx
from youtube_transcript_api import YouTubeTranscriptApi

from template_langgraph.loggers import get_logger

Expand Down Expand Up @@ -39,24 +40,38 @@ def scrape(self, url: str) -> str: # pragma: no cover - interface
class MockScraper(BaseScraper):
"""Deterministic scraper for tests / offline development."""

def scrape(self, url: str) -> str: # noqa: D401
def scrape(self, url: str) -> str:
logger.info(f"Mock scrape for URL: {url}")
return "<html><body><h1>Mocked web content</h1></body></html>"


class HttpxScraper(BaseScraper):
"""Simple httpx based scraper."""

def scrape(self, url: str) -> str: # noqa: D401
def scrape(self, url: str) -> str:
logger.info(f"Fetching URL via httpx: {url}")
with httpx.Client() as client:
response = client.get(url)
response.raise_for_status()
return response.text


class YouTubeTranscriptScraper(BaseScraper):
"""YouTube transcript scraper."""

def scrape(self, url: str) -> str:
video_id = url.split("v=")[-1].split("&")[0]
transcript = YouTubeTranscriptApi().fetch(
video_id=video_id,
languages=["ja", "en"],
)
text_list = [item.text for item in transcript]
return " ".join(text_list)


__all__ = [
"BaseScraper",
"MockScraper",
"HttpxScraper",
"YouTubeTranscriptScraper",
]
15 changes: 15 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.