diff --git a/.env.template b/.env.template index 3c84001..9c4b9b0 100644 --- a/.env.template +++ b/.env.template @@ -51,7 +51,7 @@ AI_SEARCH_KEY="xxx" AI_SEARCH_INDEX_NAME="kabuto" # --------- -# Utilities +# Internals # --------- ## CSV Loader Settings @@ -63,3 +63,13 @@ PDF_LOADER_DATA_DIR_PATH="./data" ## OpenTelemetry Settings OTEL_SERVICE_NAME="template-langgraph" OTEL_COLLECTOR_ENDPOINT="http://localhost:4317" + +## Scraper Settings +SCRAPER_TYPE="mock" # Options: "mock", "httpx", "youtube_transcript" + +## Summarizer Settings +SUMMARIZER_TYPE="mock" # Options: "mock", "llm" + +## Notifier Settings +NOTIFIER_TYPE="mock" # Options: "mock", "slack" +NOTIFIER_SLACK_WEBHOOK_URL="https://hooks.slack.com/services/xxx" diff --git a/docs/index.ja.md b/docs/index.ja.md index 6663b6f..52af394 100644 --- a/docs/index.ja.md +++ b/docs/index.ja.md @@ -130,7 +130,7 @@ Pydantic モデルを使用して AI 応答から構造化データを取得す - **`template_langgraph/llms/`** - LLM API ラッパー(Azure OpenAI など) - **`template_langgraph/tools/`** - 検索、データ取得用ツール実装 -- **`template_langgraph/utilities/`** - ドキュメント読み込みと処理用ヘルパー関数 +- **`template_langgraph/internals/`** - 内部ユーティリティとヘルパー関数(CSV/PDF ローダー、Otel ラッパーなど) ## サンプルコードの実行 diff --git a/docs/index.md b/docs/index.md index 795a04c..7bec53e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -130,7 +130,7 @@ Implements the supervisor pattern where one agent coordinates multiple specializ - **`template_langgraph/llms/`** - LLM API wrappers (Azure OpenAI, etc.) - **`template_langgraph/tools/`** - Tool implementations for search, data retrieval -- **`template_langgraph/utilities/`** - Helper functions for document loading and processing +- **`template_langgraph/internals/`** - Internal utilities and helper functions (CSV/PDF loaders, Otel wrappers, etc.) ## Running the Examples diff --git a/scripts/agent_operator.py b/scripts/agent_operator.py index 0600cc7..ce63b0d 100644 --- a/scripts/agent_operator.py +++ b/scripts/agent_operator.py @@ -9,23 +9,12 @@ from template_langgraph.agents.image_classifier_agent.models import Results from template_langgraph.agents.issue_formatter_agent.agent import graph as issue_formatter_agent_graph from template_langgraph.agents.kabuto_helpdesk_agent.agent import graph as kabuto_helpdesk_agent_graph -from template_langgraph.agents.news_summarizer_agent.agent import MockNotifier, NewsSummarizerAgent -from template_langgraph.agents.news_summarizer_agent.agent import ( - graph as news_summarizer_agent_graph, -) +from template_langgraph.agents.news_summarizer_agent.agent import graph as news_summarizer_agent_graph from template_langgraph.agents.news_summarizer_agent.models import ( AgentInputState, AgentState, Article, ) -from template_langgraph.agents.news_summarizer_agent.scrapers import ( - BaseScraper, - HttpxScraper, - YouTubeTranscriptScraper, -) -from template_langgraph.agents.news_summarizer_agent.summarizers import ( - LlmSummarizer, -) from template_langgraph.agents.task_decomposer_agent.agent import graph as task_decomposer_agent_graph from template_langgraph.loggers import get_logger @@ -56,18 +45,6 @@ def get_agent_graph(name: str): raise ValueError(f"Unknown agent name: {name}") -def get_scraper(scraper_type: str) -> BaseScraper: - scraper = None - if scraper_type == "Httpx": - scraper = HttpxScraper() - elif scraper_type == "YouTubeTranscript": - scraper = YouTubeTranscriptScraper() - - if not scraper: - raise ValueError(f"Unknown scraper type: {scraper_type}") - return scraper - - @app.command() def png( name: str = typer.Option( @@ -159,12 +136,6 @@ def news_summarizer_agent( "-u", help="Comma-separated list of URLs to summarize", ), - scraper: str = typer.Option( - "Httpx", # YouTubeTranscript - "--scraper", - "-s", - help="Scraper to use for fetching content", - ), verbose: bool = typer.Option( False, "--verbose", @@ -176,11 +147,7 @@ def news_summarizer_agent( if verbose: logger.setLevel(logging.DEBUG) - graph = NewsSummarizerAgent( - notifier=MockNotifier(), - scraper=get_scraper(scraper), - summarizer=LlmSummarizer(), - ).create_graph() + graph = news_summarizer_agent_graph for event in graph.stream( input=AgentState( input=AgentInputState( diff --git a/scripts/ai_search_operator.py b/scripts/ai_search_operator.py index c52da87..2cf733f 100644 --- a/scripts/ai_search_operator.py +++ b/scripts/ai_search_operator.py @@ -3,10 +3,10 @@ import typer from dotenv import load_dotenv +from template_langgraph.internals.csv_loaders import CsvLoaderWrapper +from template_langgraph.internals.pdf_loaders import PdfLoaderWrapper from template_langgraph.loggers import get_logger from template_langgraph.tools.ai_search_tool import AiSearchClientWrapper -from template_langgraph.utilities.csv_loaders import CsvLoaderWrapper -from template_langgraph.utilities.pdf_loaders import PdfLoaderWrapper # Initialize the Typer application app = typer.Typer( diff --git a/scripts/cosmosdb_operator.py b/scripts/cosmosdb_operator.py index 534ed53..38b85af 100644 --- a/scripts/cosmosdb_operator.py +++ b/scripts/cosmosdb_operator.py @@ -3,10 +3,10 @@ import typer from dotenv import load_dotenv +from template_langgraph.internals.csv_loaders import CsvLoaderWrapper +from template_langgraph.internals.pdf_loaders import PdfLoaderWrapper from template_langgraph.loggers import get_logger from template_langgraph.tools.cosmosdb_tool import CosmosdbClientWrapper -from template_langgraph.utilities.csv_loaders import CsvLoaderWrapper -from template_langgraph.utilities.pdf_loaders import PdfLoaderWrapper # Initialize the Typer application app = typer.Typer( diff --git a/scripts/elasticsearch_operator.py b/scripts/elasticsearch_operator.py index b065bee..06fab7f 100644 --- a/scripts/elasticsearch_operator.py +++ b/scripts/elasticsearch_operator.py @@ -3,9 +3,9 @@ import typer from dotenv import load_dotenv +from template_langgraph.internals.pdf_loaders import PdfLoaderWrapper from template_langgraph.loggers import get_logger from template_langgraph.tools.elasticsearch_tool import ElasticsearchClientWrapper -from template_langgraph.utilities.pdf_loaders import PdfLoaderWrapper # Initialize the Typer application app = typer.Typer( diff --git a/scripts/otel_operator.py b/scripts/otel_operator.py index 993dadd..d90b7e1 100644 --- a/scripts/otel_operator.py +++ b/scripts/otel_operator.py @@ -4,8 +4,8 @@ import typer from dotenv import load_dotenv +from template_langgraph.internals.otel_helpers import OtelWrapper from template_langgraph.loggers import get_logger -from template_langgraph.utilities.otel_helpers import OtelWrapper # Initialize the Typer application app = typer.Typer( diff --git a/scripts/qdrant_operator.py b/scripts/qdrant_operator.py index f12e549..fec7a67 100644 --- a/scripts/qdrant_operator.py +++ b/scripts/qdrant_operator.py @@ -4,10 +4,10 @@ from dotenv import load_dotenv from qdrant_client.models import PointStruct +from template_langgraph.internals.csv_loaders import CsvLoaderWrapper from template_langgraph.llms.azure_openais import AzureOpenAiWrapper from template_langgraph.loggers import get_logger from template_langgraph.tools.qdrant_tool import QdrantClientWrapper -from template_langgraph.utilities.csv_loaders import CsvLoaderWrapper # Initialize the Typer application app = typer.Typer( diff --git a/template_langgraph/agents/news_summarizer_agent/agent.py b/template_langgraph/agents/news_summarizer_agent/agent.py index 4cdec16..453c6cd 100644 --- a/template_langgraph/agents/news_summarizer_agent/agent.py +++ b/template_langgraph/agents/news_summarizer_agent/agent.py @@ -8,40 +8,27 @@ StructuredArticle, SummarizeWebContentState, ) -from template_langgraph.agents.news_summarizer_agent.scrapers import ( - BaseScraper, - HttpxScraper, - MockScraper, -) -from template_langgraph.agents.news_summarizer_agent.summarizers import ( - BaseSummarizer, - LlmSummarizer, - MockSummarizer, -) +from template_langgraph.internals.notifiers import get_notifier +from template_langgraph.internals.scrapers import get_scraper +from template_langgraph.internals.summarizers import get_summarizer from template_langgraph.llms.azure_openais import AzureOpenAiWrapper from template_langgraph.loggers import get_logger logger = get_logger(__name__) -class MockNotifier: - def notify(self, id: str, body: dict) -> None: - """Simulate sending a notification to the user.""" - logger.info(f"Notification sent for request {id}: {body}") - - class NewsSummarizerAgent: def __init__( self, llm=AzureOpenAiWrapper().chat_model, - notifier=MockNotifier(), - scraper: BaseScraper = MockScraper(), - summarizer: BaseSummarizer = MockSummarizer(), + notifier=get_notifier(), + scraper=get_scraper(), + summarizer=get_summarizer(), ): self.llm = llm self.notifier = notifier - self.scraper: BaseScraper = scraper - self.summarizer: BaseSummarizer = summarizer + self.scraper = scraper + self.summarizer = summarizer def create_graph(self): """Create the main graph for the agent.""" @@ -127,23 +114,20 @@ def summarize_web_content(self, state: SummarizeWebContentState): def notify(self, state: AgentState) -> AgentState: """Send notifications to the user.""" logger.info(f"Sending notifications with state: {state}") - # Simulate sending notifications - # convert list of articles to a dictionary for notification summary = {} for i, article in enumerate(state.articles): - summary[i] = article.model_dump() + summary[i] = { + "url": article.url, + "structured_article": article.structured_article.model_dump(), + } self.notifier.notify( - id=state.input.id, - body=summary, + text=summary.__str__(), ) return state -# For testing -# graph = NewsSummarizerAgent().create_graph() - graph = NewsSummarizerAgent( - notifier=MockNotifier(), - scraper=HttpxScraper(), - summarizer=LlmSummarizer(), + notifier=get_notifier(), + scraper=get_scraper(), + summarizer=get_summarizer(), ).create_graph() diff --git a/template_langgraph/utilities/__init__.py b/template_langgraph/internals/__init__.py similarity index 100% rename from template_langgraph/utilities/__init__.py rename to template_langgraph/internals/__init__.py diff --git a/template_langgraph/utilities/csv_loaders.py b/template_langgraph/internals/csv_loaders.py similarity index 100% rename from template_langgraph/utilities/csv_loaders.py rename to template_langgraph/internals/csv_loaders.py diff --git a/template_langgraph/internals/notifiers.py b/template_langgraph/internals/notifiers.py new file mode 100644 index 0000000..724c6fc --- /dev/null +++ b/template_langgraph/internals/notifiers.py @@ -0,0 +1,91 @@ +"""Scraper interfaces and implementations for NewsSummarizerAgent. + +This module defines an abstract base scraper so different scraping strategies +(mock, httpx-based, future headless browser, etc.) can be plugged into the agent +without changing orchestration logic. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from enum import Enum +from functools import lru_cache + +import httpx +from pydantic_settings import BaseSettings, SettingsConfigDict + +from template_langgraph.loggers import get_logger + +logger = get_logger(__name__) + + +class NotifierType(str, Enum): + MOCK = "mock" + SLACK = "slack" + + +class Settings(BaseSettings): + notifier_type: NotifierType = NotifierType.MOCK + notifier_slack_webhook_url: str = "https://hooks.slack.com/services/Txxx/Bxxx/xxx" + + model_config = SettingsConfigDict( + env_file=".env", + env_ignore_empty=True, + extra="ignore", + ) + + +@lru_cache +def get_notifier_settings() -> Settings: + """Get notifier settings.""" + return Settings() + + +class BaseNotifier(ABC): + """Abstract base notifier.""" + + @abstractmethod + def notify(self, text: str): + """Send a notification with the given text. + + Args: + text: The text to include in the notification. + + """ + raise NotImplementedError + + +class MockNotifier(BaseNotifier): + """Deterministic notifier for tests / offline development.""" + + def notify(self, text: str): + logger.info(f"Mock notify with text: {text}") + + +class SlackNotifier(BaseNotifier): + """Slack notifier for sending notifications to a Slack channel.""" + + def __init__(self, settings=get_notifier_settings()): + self.webhook_url = settings.notifier_slack_webhook_url + + def notify(self, text: str): + logger.info(f"Slack notify with text: {text}") + with httpx.Client() as client: + client.post( + self.webhook_url, + json={ + "text": text, + }, + ) + + +def get_notifier(settings: Settings = None) -> BaseNotifier: + if settings is None: + settings = get_notifier_settings() + + if settings.notifier_type == NotifierType.MOCK: + return MockNotifier() + elif settings.notifier_type == NotifierType.SLACK: + return SlackNotifier(settings) + else: + raise ValueError(f"Unknown notifier type: {settings.notifier_type}") diff --git a/template_langgraph/utilities/otel_helpers.py b/template_langgraph/internals/otel_helpers.py similarity index 100% rename from template_langgraph/utilities/otel_helpers.py rename to template_langgraph/internals/otel_helpers.py diff --git a/template_langgraph/utilities/pdf_loaders.py b/template_langgraph/internals/pdf_loaders.py similarity index 100% rename from template_langgraph/utilities/pdf_loaders.py rename to template_langgraph/internals/pdf_loaders.py diff --git a/template_langgraph/agents/news_summarizer_agent/scrapers.py b/template_langgraph/internals/scrapers.py similarity index 65% rename from template_langgraph/agents/news_summarizer_agent/scrapers.py rename to template_langgraph/internals/scrapers.py index 0ece189..e9687e8 100644 --- a/template_langgraph/agents/news_summarizer_agent/scrapers.py +++ b/template_langgraph/internals/scrapers.py @@ -8,8 +8,11 @@ from __future__ import annotations from abc import ABC, abstractmethod +from enum import Enum +from functools import lru_cache import httpx +from pydantic_settings import BaseSettings, SettingsConfigDict from youtube_transcript_api import YouTubeTranscriptApi from template_langgraph.loggers import get_logger @@ -17,6 +20,28 @@ logger = get_logger(__name__) +class ScraperType(str, Enum): + MOCK = "mock" + HTTPX = "httpx" + YOUTUBE_TRANSCRIPT = "youtube_transcript" + + +class Settings(BaseSettings): + scraper_type: ScraperType = ScraperType.MOCK + + model_config = SettingsConfigDict( + env_file=".env", + env_ignore_empty=True, + extra="ignore", + ) + + +@lru_cache +def get_scraper_settings() -> Settings: + """Get scraper settings.""" + return Settings() + + class BaseScraper(ABC): """Abstract base scraper. @@ -60,6 +85,7 @@ class YouTubeTranscriptScraper(BaseScraper): """YouTube transcript scraper.""" def scrape(self, url: str) -> str: + logger.info(f"Fetching YouTube transcript for URL: {url}") video_id = url.split("v=")[-1].split("&")[0] transcript = YouTubeTranscriptApi().fetch( video_id=video_id, @@ -69,9 +95,15 @@ def scrape(self, url: str) -> str: return " ".join(text_list) -__all__ = [ - "BaseScraper", - "MockScraper", - "HttpxScraper", - "YouTubeTranscriptScraper", -] +def get_scraper(settings: Settings = None) -> BaseScraper: + if settings is None: + settings = get_scraper_settings() + + if settings.scraper_type == ScraperType.MOCK: + return MockScraper() + elif settings.scraper_type == ScraperType.HTTPX: + return HttpxScraper() + elif settings.scraper_type == ScraperType.YOUTUBE_TRANSCRIPT: + return YouTubeTranscriptScraper() + else: + raise ValueError(f"Unknown scraper type: {settings.scraper_type}") diff --git a/template_langgraph/agents/news_summarizer_agent/summarizers.py b/template_langgraph/internals/summarizers.py similarity index 65% rename from template_langgraph/agents/news_summarizer_agent/summarizers.py rename to template_langgraph/internals/summarizers.py index 4d88cc0..0fea972 100644 --- a/template_langgraph/agents/news_summarizer_agent/summarizers.py +++ b/template_langgraph/internals/summarizers.py @@ -3,9 +3,12 @@ from __future__ import annotations from abc import ABC, abstractmethod +from enum import Enum +from functools import lru_cache from typing import Any from langchain_core.language_models.chat_models import BaseChatModel +from pydantic_settings import BaseSettings, SettingsConfigDict from template_langgraph.agents.news_summarizer_agent.models import StructuredArticle from template_langgraph.llms.azure_openais import AzureOpenAiWrapper @@ -14,6 +17,29 @@ logger = get_logger(__name__) +class SummarizerType(str, Enum): + """Enumeration of available summarizer types.""" + + MOCK = "mock" + LLM = "llm" + + +class Settings(BaseSettings): + summarizer_type: SummarizerType = SummarizerType.MOCK + + model_config = SettingsConfigDict( + env_file=".env", + env_ignore_empty=True, + extra="ignore", + ) + + +@lru_cache +def get_summarizer_settings() -> Settings: + """Get summarizer settings.""" + return Settings() + + class BaseSummarizer(ABC): """Abstract base summarizer returning a StructuredArticle.""" @@ -52,8 +78,13 @@ def summarize(self, prompt: str, content: str) -> StructuredArticle: # noqa: D4 ) -__all__ = [ - "BaseSummarizer", - "MockSummarizer", - "LlmSummarizer", -] +def get_summarizer(settings: Settings = None) -> BaseSummarizer: + if settings is None: + settings = get_summarizer_settings() + + if settings.summarizer_type == SummarizerType.MOCK: + return MockSummarizer() + elif settings.summarizer_type == SummarizerType.LLM: + return LlmSummarizer() + else: + raise ValueError(f"Unknown summarizer type: {settings.summarizer_type}") diff --git a/tests/utilities/__init__.py b/tests/internals/__init__.py similarity index 100% rename from tests/utilities/__init__.py rename to tests/internals/__init__.py diff --git a/tests/utilities/test_csv_loaders.py b/tests/internals/test_csv_loaders.py similarity index 99% rename from tests/utilities/test_csv_loaders.py rename to tests/internals/test_csv_loaders.py index 0f11001..8de5b74 100644 --- a/tests/utilities/test_csv_loaders.py +++ b/tests/internals/test_csv_loaders.py @@ -5,7 +5,7 @@ from langchain_core.documents import Document -from template_langgraph.utilities.csv_loaders import ( +from template_langgraph.internals.csv_loaders import ( CsvLoaderWrapper, Settings, get_csv_loader_settings, diff --git a/tests/utilities/test_pdf_loaders.py b/tests/internals/test_pdf_loaders.py similarity index 90% rename from tests/utilities/test_pdf_loaders.py rename to tests/internals/test_pdf_loaders.py index 8edce55..045d6e0 100644 --- a/tests/utilities/test_pdf_loaders.py +++ b/tests/internals/test_pdf_loaders.py @@ -3,7 +3,7 @@ from langchain_core.documents import Document -from template_langgraph.utilities.pdf_loaders import ( +from template_langgraph.internals.pdf_loaders import ( PdfLoaderWrapper, Settings, get_pdf_loader_settings, @@ -54,8 +54,8 @@ def test_init_with_custom_settings(self): wrapper = PdfLoaderWrapper(settings=custom_settings) assert wrapper.settings.pdf_loader_data_dir_path == "/custom/path" - @patch("template_langgraph.utilities.pdf_loaders.glob") - @patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader") + @patch("template_langgraph.internals.pdf_loaders.glob") + @patch("template_langgraph.internals.pdf_loaders.PyPDFLoader") def test_load_pdf_docs_no_files(self, mock_pdf_loader, mock_glob): """Test load_pdf_docs when no PDF files are found.""" mock_glob.return_value = [] @@ -69,8 +69,8 @@ def test_load_pdf_docs_no_files(self, mock_pdf_loader, mock_glob): recursive=True, ) - @patch("template_langgraph.utilities.pdf_loaders.glob") - @patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader") + @patch("template_langgraph.internals.pdf_loaders.glob") + @patch("template_langgraph.internals.pdf_loaders.PyPDFLoader") def test_load_pdf_docs_with_files(self, mock_pdf_loader, mock_glob): """Test load_pdf_docs when PDF files are found.""" # Setup mock data @@ -106,8 +106,8 @@ def test_load_pdf_docs_with_files(self, mock_pdf_loader, mock_glob): mock_loader_instance1.load_and_split.assert_called_once() mock_loader_instance2.load_and_split.assert_called_once() - @patch("template_langgraph.utilities.pdf_loaders.glob") - @patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader") + @patch("template_langgraph.internals.pdf_loaders.glob") + @patch("template_langgraph.internals.pdf_loaders.PyPDFLoader") def test_load_pdf_docs_with_custom_data_dir(self, mock_pdf_loader, mock_glob): """Test load_pdf_docs with custom data directory.""" custom_settings = Settings(pdf_loader_data_dir_path="/custom/data") @@ -121,8 +121,8 @@ def test_load_pdf_docs_with_custom_data_dir(self, mock_pdf_loader, mock_glob): recursive=True, ) - @patch("template_langgraph.utilities.pdf_loaders.glob") - @patch("template_langgraph.utilities.pdf_loaders.PyPDFLoader") + @patch("template_langgraph.internals.pdf_loaders.glob") + @patch("template_langgraph.internals.pdf_loaders.PyPDFLoader") def test_load_pdf_docs_text_splitter_configuration(self, mock_pdf_loader, mock_glob): """Test that text splitter is configured correctly.""" mock_glob.return_value = ["./data/test.pdf"]