Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ COSMOSDB_PARTITION_KEY="/id"
SQL_DATABASE_URI="sqlite:///template_langgraph.db"
# SQL_DATABASE_URI="postgresql://user:password@localhost:5432/db"

# Azure AI Search Settings
AI_SEARCH_ENDPOINT="https://xxx.search.windows.net/"
AI_SEARCH_KEY="xxx"
AI_SEARCH_INDEX_NAME="kabuto"

# ---------
# Utilities
# ---------
Expand Down
1 change: 1 addition & 0 deletions docs/references.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
- [Qdrant](https://github.com/qdrant/qdrant)
- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)
- [Azure AI Search](https://python.langchain.com/docs/integrations/vectorstores/azuresearch/)

### Services

Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"azure-cosmos>=4.9.0",
"azure-identity>=1.23.1",
"azure-search-documents>=11.5.3",
"elasticsearch>=9.1.0",
"fastapi[standard]>=0.116.1",
"httpx>=0.28.1",
Expand Down
102 changes: 102 additions & 0 deletions scripts/ai_search_operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import logging

import typer
from dotenv import load_dotenv

from template_langgraph.loggers import get_logger
from template_langgraph.tools.ai_search_tool import AiSearchClientWrapper
from template_langgraph.utilities.csv_loaders import CsvLoaderWrapper
from template_langgraph.utilities.pdf_loaders import PdfLoaderWrapper

# Initialize the Typer application
app = typer.Typer(
add_completion=False,
help="AI Search operator CLI",
)

# Set up logging
logger = get_logger(__name__)


@app.command()
def add_documents(
verbose: bool = typer.Option(
False,
"--verbose",
"-v",
help="Enable verbose output",
),
):
# Set up logging
if verbose:
logger.setLevel(logging.DEBUG)

# Load documents from PDF files
pdf_documents = PdfLoaderWrapper().load_pdf_docs()
logger.info(f"Loaded {len(pdf_documents)} documents from PDF.")

# Load documents from CSV files
csv_documents = CsvLoaderWrapper().load_csv_docs()
logger.info(f"Loaded {len(csv_documents)} documents from CSV.")

# Combine all documents
documents = pdf_documents + csv_documents
logger.info(f"Total documents to add: {len(documents)}")

# Add documents to AI Search
ai_search_client = AiSearchClientWrapper()
ids = ai_search_client.add_documents(
documents=documents,
)
logger.info(f"Added {len(ids)} documents to AI Search.")
for id in ids:
logger.debug(f"Added document ID: {id}")


@app.command()
def similarity_search(
query: str = typer.Option(
"禅モード",
"--query",
"-q",
help="Query to search in the AI Search index",
),
k: int = typer.Option(
5,
"--k",
"-k",
help="Number of results to return from the similarity search",
),
verbose: bool = typer.Option(
False,
"--verbose",
"-v",
help="Enable verbose output",
),
):
# Set up logging
if verbose:
logger.setLevel(logging.DEBUG)

logger.info(f"Searching AI Search with query: {query}")

# Perform similarity search
ai_search_client = AiSearchClientWrapper()
documents = ai_search_client.similarity_search(
query=query,
k=k, # Number of results to return
)
logger.info(f"Found {len(documents)} results for query: {query}")

# Log the results
for i, document in enumerate(documents, start=1):
logger.debug("-" * 40)
logger.debug(f"#{i}: {document.model_dump_json(indent=2)}")


if __name__ == "__main__":
load_dotenv(
override=True,
verbose=True,
)
app()
105 changes: 105 additions & 0 deletions template_langgraph/tools/ai_search_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from functools import lru_cache

from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_core.documents import Document
from langchain_core.tools import tool
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings, SettingsConfigDict

from template_langgraph.llms.azure_openais import AzureOpenAiWrapper


class Settings(BaseSettings):
ai_search_key: str = "<your-ai-search-key>"
ai_search_endpoint: str = "<your-ai-search-endpoint>"
ai_search_index_name: str = "<your-ai-index-name>"

model_config = SettingsConfigDict(
env_file=".env",
env_ignore_empty=True,
extra="ignore",
)


@lru_cache
def get_ai_search_settings() -> Settings:
"""Get AI Search settings."""
return Settings()


class AiSearchClientWrapper:
def __init__(
self,
settings: Settings = None,
):
if settings is None:
settings = get_ai_search_settings()
self.vector_store: AzureSearch = AzureSearch(
azure_search_endpoint=settings.ai_search_endpoint,
azure_search_key=settings.ai_search_key,
index_name=settings.ai_search_index_name,
embedding_function=AzureOpenAiWrapper().embedding_model.embed_query,
)

def add_documents(
self,
documents: list[Document],
) -> list[str]:
"""Add documents to a Cosmos DB container."""
return self.vector_store.add_documents(
documents=documents,
)

def similarity_search(
self,
query: str,
k: int = 5,
) -> list[Document]:
"""Perform a similarity search in the Cosmos DB index."""
return self.vector_store.similarity_search(
query=query,
k=k, # Number of results to return
)


class AiSearchInput(BaseModel):
query: str = Field(
default="禅モード",
description="Query to search in the AI Search index",
)
k: int = Field(
default=5,
description="Number of results to return from the similarity search",
)


class AiSearchOutput(BaseModel):
content: str = Field(description="Content of the document")
id: str = Field(description="ID of the document")


@tool(args_schema=AiSearchInput)
def search_ai_search(query: str, k: int = 5) -> list[AiSearchOutput]:
"""Search for similar documents in AI Search index.

Args:
query: The search query string
k: Number of results to return (default: 5)

Returns:
AiSearchOutput: A Pydantic model containing the search results
"""
wrapper = AiSearchClientWrapper()
documents = wrapper.similarity_search(
query=query,
k=k,
)
outputs = []
for document in documents:
outputs.append(
{
"content": document.page_content,
"id": document.id,
}
)
return outputs
2 changes: 2 additions & 0 deletions template_langgraph/tools/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from template_langgraph.llms.azure_openais import AzureOpenAiWrapper
from template_langgraph.loggers import get_logger
from template_langgraph.tools.ai_search_tool import search_ai_search
from template_langgraph.tools.cosmosdb_tool import search_cosmosdb
from template_langgraph.tools.dify_tool import run_dify_workflow
from template_langgraph.tools.elasticsearch_tool import search_elasticsearch
Expand All @@ -18,6 +19,7 @@ def get_default_tools():
logger.error(f"Error occurred while getting SQL database tools: {e}")
sql_database_tools = []
return [
search_ai_search,
search_cosmosdb,
run_dify_workflow,
search_qdrant,
Expand Down
28 changes: 28 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.