From 33a59aac7b6a6509ff8a9b98bc74a84b4a9b7732 Mon Sep 17 00:00:00 2001 From: ks6088ts Date: Tue, 5 Aug 2025 17:38:20 +0900 Subject: [PATCH 1/2] add cosmosdb tool --- .env.template | 7 + docs/references.ja.md | 1 + docs/references.md | 1 + pyproject.toml | 1 + scripts/cosmosdb_operator.py | 104 ++++++++++++ scripts/test_all.sh | 5 + .../agents/chat_with_tools_agent/agent.py | 2 + template_langgraph/tools/cosmosdb_tool.py | 160 ++++++++++++++++++ uv.lock | 29 ++++ 9 files changed, 310 insertions(+) create mode 100644 scripts/cosmosdb_operator.py create mode 100644 template_langgraph/tools/cosmosdb_tool.py diff --git a/.env.template b/.env.template index 14fc995..b7451c0 100644 --- a/.env.template +++ b/.env.template @@ -33,6 +33,13 @@ ELASTICSEARCH_URL="http://localhost:9200" DIFY_API_URL="https://api.dify.ai/v1" DIFY_API_KEY="xxx" +## Cosmos DB Settings +COSMOSDB_HOST="https://xxx.documents.azure.com:443/" +COSMOSDB_KEY="xxx" +COSMOSDB_DATABASE_NAME="langgraph" +COSMOSDB_CONTAINER_NAME="docs_kabuto" +COSMOSDB_PARTITION_KEY="/id" + # --------- # Utilities # --------- diff --git a/docs/references.ja.md b/docs/references.ja.md index 5e7e371..0344afe 100644 --- a/docs/references.ja.md +++ b/docs/references.ja.md @@ -19,3 +19,4 @@ - [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/) - [Qdrant](https://github.com/qdrant/qdrant) +- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/) diff --git a/docs/references.md b/docs/references.md index 59e6f16..6b9b0d8 100644 --- a/docs/references.md +++ b/docs/references.md @@ -19,3 +19,4 @@ - [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/) - [Qdrant](https://github.com/qdrant/qdrant) +- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/) diff --git a/pyproject.toml b/pyproject.toml index 95e325a..91cfdd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ description = "A GitHub template repository for Python" readme = "README.md" requires-python = ">=3.10" dependencies = [ + "azure-cosmos>=4.9.0", "elasticsearch>=9.1.0", "httpx>=0.28.1", "langchain-community>=0.3.27", diff --git a/scripts/cosmosdb_operator.py b/scripts/cosmosdb_operator.py new file mode 100644 index 0000000..cddcded --- /dev/null +++ b/scripts/cosmosdb_operator.py @@ -0,0 +1,104 @@ +import logging + +import typer +from dotenv import load_dotenv + +from template_langgraph.loggers import get_logger +from template_langgraph.tools.cosmosdb_tool import CosmosdbClientWrapper +from template_langgraph.utilities.csv_loaders import CsvLoaderWrapper +from template_langgraph.utilities.pdf_loaders import PdfLoaderWrapper + +# Initialize the Typer application +app = typer.Typer( + add_completion=False, + help="Cosmos DB operator CLI", +) + +# Set up logging +logger = get_logger(__name__) + + +@app.command() +def add_documents( + verbose: bool = typer.Option( + False, + "--verbose", + "-v", + help="Enable verbose output", + ), +): + # Set up logging + if verbose: + logger.setLevel(logging.DEBUG) + + # Load documents from PDF files + pdf_documents = PdfLoaderWrapper().load_pdf_docs() + logger.info(f"Loaded {len(pdf_documents)} documents from PDF.") + + # Load documents from CSV files + csv_documents = CsvLoaderWrapper().load_csv_docs() + logger.info(f"Loaded {len(csv_documents)} documents from CSV.") + + # Combine all documents + documents = pdf_documents + csv_documents + logger.info(f"Total documents to add: {len(documents)}") + + # Add documents to Cosmos DB + cosmosdb_client = CosmosdbClientWrapper() + ids = cosmosdb_client.add_documents( + documents=documents, + ) + logger.info(f"Added {len(ids)} documents to Cosmos DB.") + for id in ids: + logger.debug(f"Added document ID: {id}") + + # assert cosmosdb_client.delete_documents(ids=ids), "Failed to delete documents from Cosmos DB" + + +@app.command() +def similarity_search( + query: str = typer.Option( + "禅モード", + "--query", + "-q", + help="Query to search in the Cosmos DB index", + ), + k: int = typer.Option( + 5, + "--k", + "-k", + help="Number of results to return from the similarity search", + ), + verbose: bool = typer.Option( + False, + "--verbose", + "-v", + help="Enable verbose output", + ), +): + # Set up logging + if verbose: + logger.setLevel(logging.DEBUG) + + logger.info(f"Searching Cosmos DB with query: {query}") + + # Perform similarity search + cosmosdb_client = CosmosdbClientWrapper() + documents = cosmosdb_client.similarity_search( + query=query, + k=k, # Number of results to return + ) + logger.info(f"Found {len(documents)} results for query: {query}") + + # Log the results + for i, document in enumerate(documents, start=1): + logger.debug("-" * 40) + logger.debug(f"#{i}: {document.model_dump_json(indent=2)}") + + +if __name__ == "__main__": + load_dotenv( + override=True, + verbose=True, + ) + app() diff --git a/scripts/test_all.sh b/scripts/test_all.sh index d859cab..359e6fd 100644 --- a/scripts/test_all.sh +++ b/scripts/test_all.sh @@ -19,6 +19,11 @@ uv run python scripts/elasticsearch_operator.py create-index --index-name docs_k uv run python scripts/elasticsearch_operator.py add-documents --index-name docs_kabuto --verbose uv run python scripts/elasticsearch_operator.py search-documents --index-name docs_kabuto --query "禅モード" --verbose +# Azure Cosmos DB NoSQL +uv run python scripts/cosmosdb_operator.py --help +uv run python scripts/cosmosdb_operator.py add-documents --verbose +uv run python scripts/cosmosdb_operator.py similarity-search --query "禅モード" --k 3 --verbose + # Agents ## Draw agent graph diff --git a/template_langgraph/agents/chat_with_tools_agent/agent.py b/template_langgraph/agents/chat_with_tools_agent/agent.py index 89cd856..730f144 100644 --- a/template_langgraph/agents/chat_with_tools_agent/agent.py +++ b/template_langgraph/agents/chat_with_tools_agent/agent.py @@ -6,6 +6,7 @@ from template_langgraph.agents.chat_with_tools_agent.models import AgentState from template_langgraph.llms.azure_openais import AzureOpenAiWrapper from template_langgraph.loggers import get_logger +from template_langgraph.tools.cosmosdb_tool import search_cosmosdb from template_langgraph.tools.dify_tool import run_dify_workflow from template_langgraph.tools.elasticsearch_tool import search_elasticsearch from template_langgraph.tools.qdrant_tool import search_qdrant @@ -41,6 +42,7 @@ class ChatWithToolsAgent: def __init__(self): self.llm = AzureOpenAiWrapper().chat_model self.tools = [ + search_cosmosdb, run_dify_workflow, search_qdrant, search_elasticsearch, diff --git a/template_langgraph/tools/cosmosdb_tool.py b/template_langgraph/tools/cosmosdb_tool.py new file mode 100644 index 0000000..6982d98 --- /dev/null +++ b/template_langgraph/tools/cosmosdb_tool.py @@ -0,0 +1,160 @@ +from functools import lru_cache + +from azure.cosmos import CosmosClient, PartitionKey +from langchain_community.vectorstores.azure_cosmos_db_no_sql import ( + AzureCosmosDBNoSqlVectorSearch, +) +from langchain_core.documents import Document +from langchain_core.tools import tool +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings, SettingsConfigDict + +from template_langgraph.llms.azure_openais import AzureOpenAiWrapper + + +class Settings(BaseSettings): + cosmosdb_host: str = "" + cosmosdb_key: str = "" + cosmosdb_database_name: str = "template_langgraph" + cosmosdb_container_name: str = "kabuto" + cosmosdb_partition_key: str = "/id" + + model_config = SettingsConfigDict( + env_file=".env", + env_ignore_empty=True, + extra="ignore", + ) + + +@lru_cache +def get_cosmosdb_settings() -> Settings: + """Get Cosmos DB settings.""" + return Settings() + + +class CosmosdbClientWrapper: + def __init__( + self, + settings: Settings = None, + ): + if settings is None: + settings = get_cosmosdb_settings() + self.vector_store = AzureCosmosDBNoSqlVectorSearch( + cosmos_client=CosmosClient( + url=settings.cosmosdb_host, + credential=settings.cosmosdb_key, + ), + embedding=AzureOpenAiWrapper().embedding_model, + vector_embedding_policy={ + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": "float32", + "distanceFunction": "cosine", + "dimensions": 1536, + } + ] + }, + indexing_policy={ + "indexingMode": "consistent", + "includedPaths": [ + {"path": "/*"}, + ], + "excludedPaths": [ + {"path": '/"_etag"/?'}, + ], + "vectorIndexes": [ + {"path": "/embedding", "type": "diskANN"}, + ], + "fullTextIndexes": [ + {"path": "/text"}, + ], + }, + cosmos_container_properties={ + "partition_key": PartitionKey(path=settings.cosmosdb_partition_key), + }, + cosmos_database_properties={}, + full_text_policy={ + "defaultLanguage": "en-US", + "fullTextPaths": [ + { + "path": "/text", + "language": "en-US", + } + ], + }, + database_name=settings.cosmosdb_database_name, + container_name=settings.cosmosdb_container_name, + ) + + def add_documents( + self, + documents: list[Document], + ) -> list[str]: + """Add documents to a Cosmos DB container.""" + return self.vector_store.add_documents( + documents=documents, + ) + + def delete_documents( + self, + ids: list[str], + ) -> bool | None: + """Delete documents from a Cosmos DB container.""" + return self.vector_store.delete( + ids=ids, + ) + + def similarity_search( + self, + query: str, + k: int = 5, + ) -> list[Document]: + """Perform a similarity search in the Cosmos DB index.""" + return self.vector_store.similarity_search( + query=query, + k=k, # Number of results to return + ) + + +class CosmosdbInput(BaseModel): + query: str = Field( + default="禅モード", + description="Query to search in the Cosmos DB index", + ) + k: int = Field( + default=5, + description="Number of results to return from the similarity search", + ) + + +class CosmosdbOutput(BaseModel): + content: str = Field(description="Content of the document") + id: str = Field(description="ID of the document") + + +@tool(args_schema=CosmosdbInput) +def search_cosmosdb(query: str, k: int = 5) -> list[CosmosdbOutput]: + """Search for similar documents in CosmosDB vector store. + + Args: + query: The search query string + k: Number of results to return (default: 5) + + Returns: + CosmosdbOutput: A Pydantic model containing the search results + """ + wrapper = CosmosdbClientWrapper() + documents = wrapper.similarity_search( + query=query, + k=k, + ) + outputs = [] + for document in documents: + outputs.append( + { + "content": document.page_content, + "id": document.id, + } + ) + return outputs diff --git a/uv.lock b/uv.lock index 07f6ac0..a271f8c 100644 --- a/uv.lock +++ b/uv.lock @@ -281,6 +281,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] +[[package]] +name = "azure-core" +version = "1.35.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "six" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/89/f53968635b1b2e53e4aad2dd641488929fef4ca9dfb0b97927fa7697ddf3/azure_core-1.35.0.tar.gz", hash = "sha256:c0be528489485e9ede59b6971eb63c1eaacf83ef53001bfe3904e475e972be5c", size = 339689, upload-time = "2025-07-03T00:55:23.496Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/78/bf94897361fdd650850f0f2e405b2293e2f12808239046232bdedf554301/azure_core-1.35.0-py3-none-any.whl", hash = "sha256:8db78c72868a58f3de8991eb4d22c4d368fae226dac1002998d6c50437e7dad1", size = 210708, upload-time = "2025-07-03T00:55:25.238Z" }, +] + +[[package]] +name = "azure-cosmos" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/7c/a4e7810f85e7f83d94265ef5ff0fb1efad55a768de737d940151ea2eec45/azure_cosmos-4.9.0.tar.gz", hash = "sha256:c70db4cbf55b0ff261ed7bb8aa325a5dfa565d3c6eaa43d75d26ae5e2ad6d74f", size = 1824155, upload-time = "2024-11-19T04:09:30.195Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/dc/380f843744535497acd0b85aacb59565c84fc28bf938c8d6e897a858cd95/azure_cosmos-4.9.0-py3-none-any.whl", hash = "sha256:3b60eaa01a16a857d0faf0cec304bac6fa8620a81bc268ce760339032ef617fe", size = 303157, upload-time = "2024-11-19T04:09:32.148Z" }, +] + [[package]] name = "babel" version = "2.17.0" @@ -3675,6 +3702,7 @@ name = "template-langgraph" version = "0.0.1" source = { editable = "." } dependencies = [ + { name = "azure-cosmos" }, { name = "elasticsearch" }, { name = "httpx" }, { name = "langchain-community" }, @@ -3708,6 +3736,7 @@ docs = [ [package.metadata] requires-dist = [ + { name = "azure-cosmos", specifier = ">=4.9.0" }, { name = "elasticsearch", specifier = ">=9.1.0" }, { name = "httpx", specifier = ">=0.28.1" }, { name = "langchain-community", specifier = ">=0.3.27" }, From 8c55c5f53f4b615ee3b2ad511f4ddd700cbcd67d Mon Sep 17 00:00:00 2001 From: ks6088ts Date: Tue, 5 Aug 2025 17:42:16 +0900 Subject: [PATCH 2/2] add default tool set --- .../agents/chat_with_tools_agent/agent.py | 12 ++---------- .../agents/kabuto_helpdesk_agent/agent.py | 12 ++---------- template_langgraph/tools/common.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 20 deletions(-) create mode 100644 template_langgraph/tools/common.py diff --git a/template_langgraph/agents/chat_with_tools_agent/agent.py b/template_langgraph/agents/chat_with_tools_agent/agent.py index 730f144..c84cc2c 100644 --- a/template_langgraph/agents/chat_with_tools_agent/agent.py +++ b/template_langgraph/agents/chat_with_tools_agent/agent.py @@ -6,10 +6,7 @@ from template_langgraph.agents.chat_with_tools_agent.models import AgentState from template_langgraph.llms.azure_openais import AzureOpenAiWrapper from template_langgraph.loggers import get_logger -from template_langgraph.tools.cosmosdb_tool import search_cosmosdb -from template_langgraph.tools.dify_tool import run_dify_workflow -from template_langgraph.tools.elasticsearch_tool import search_elasticsearch -from template_langgraph.tools.qdrant_tool import search_qdrant +from template_langgraph.tools.common import DEFAULT_TOOLS logger = get_logger(__name__) @@ -41,12 +38,7 @@ def __call__(self, inputs: dict): class ChatWithToolsAgent: def __init__(self): self.llm = AzureOpenAiWrapper().chat_model - self.tools = [ - search_cosmosdb, - run_dify_workflow, - search_qdrant, - search_elasticsearch, - ] + self.tools = DEFAULT_TOOLS def create_graph(self): """Create the main graph for the agent.""" diff --git a/template_langgraph/agents/kabuto_helpdesk_agent/agent.py b/template_langgraph/agents/kabuto_helpdesk_agent/agent.py index b7341aa..3d676dc 100644 --- a/template_langgraph/agents/kabuto_helpdesk_agent/agent.py +++ b/template_langgraph/agents/kabuto_helpdesk_agent/agent.py @@ -2,9 +2,7 @@ from template_langgraph.llms.azure_openais import AzureOpenAiWrapper from template_langgraph.loggers import get_logger -from template_langgraph.tools.dify_tool import run_dify_workflow -from template_langgraph.tools.elasticsearch_tool import search_elasticsearch -from template_langgraph.tools.qdrant_tool import search_qdrant +from template_langgraph.tools.common import DEFAULT_TOOLS logger = get_logger(__name__) @@ -12,13 +10,7 @@ class KabutoHelpdeskAgent: def __init__(self, tools=None): if tools is None: - # Default tool for searching Qdrant - tools = [ - run_dify_workflow, - search_qdrant, - search_elasticsearch, - # Add other tools as needed - ] + tools = DEFAULT_TOOLS self.agent = create_react_agent( model=AzureOpenAiWrapper().chat_model, tools=tools, diff --git a/template_langgraph/tools/common.py b/template_langgraph/tools/common.py new file mode 100644 index 0000000..bc320f9 --- /dev/null +++ b/template_langgraph/tools/common.py @@ -0,0 +1,11 @@ +from template_langgraph.tools.cosmosdb_tool import search_cosmosdb +from template_langgraph.tools.dify_tool import run_dify_workflow +from template_langgraph.tools.elasticsearch_tool import search_elasticsearch +from template_langgraph.tools.qdrant_tool import search_qdrant + +DEFAULT_TOOLS = [ + search_cosmosdb, + run_dify_workflow, + search_qdrant, + search_elasticsearch, +]