ks6088ts-labs · ks6088ts · Aug 5, 2025 · Aug 5, 2025 · Aug 5, 2025
diff --git a/.env.template b/.env.template
@@ -33,6 +33,13 @@ ELASTICSEARCH_URL="http://localhost:9200"
 DIFY_API_URL="https://api.dify.ai/v1"
 DIFY_API_KEY="xxx"
 
+## Cosmos DB Settings
+COSMOSDB_HOST="https://xxx.documents.azure.com:443/"
+COSMOSDB_KEY="xxx"
+COSMOSDB_DATABASE_NAME="langgraph"
+COSMOSDB_CONTAINER_NAME="docs_kabuto"
+COSMOSDB_PARTITION_KEY="/id"
+
 # ---------
 # Utilities
 # ---------

diff --git a/docs/references.ja.md b/docs/references.ja.md
@@ -19,3 +19,4 @@
 
 - [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
 - [Qdrant](https://github.com/qdrant/qdrant)
+- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)
diff --git a/docs/references.md b/docs/references.md
@@ -19,3 +19,4 @@
 
 - [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
 - [Qdrant](https://github.com/qdrant/qdrant)
+- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,6 +5,7 @@ description = "A GitHub template repository for Python"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
+    "azure-cosmos>=4.9.0",
     "elasticsearch>=9.1.0",
     "httpx>=0.28.1",
     "langchain-community>=0.3.27",

diff --git a/scripts/cosmosdb_operator.py b/scripts/cosmosdb_operator.py
@@ -0,0 +1,104 @@
+import logging
+
+import typer
+from dotenv import load_dotenv
+
+from template_langgraph.loggers import get_logger
+from template_langgraph.tools.cosmosdb_tool import CosmosdbClientWrapper
+from template_langgraph.utilities.csv_loaders import CsvLoaderWrapper
+from template_langgraph.utilities.pdf_loaders import PdfLoaderWrapper
+
+# Initialize the Typer application
+app = typer.Typer(
+    add_completion=False,
+    help="Cosmos DB operator CLI",
+)
+
+# Set up logging
+logger = get_logger(__name__)
+
+
+@app.command()
+def add_documents(
+    verbose: bool = typer.Option(
+        False,
+        "--verbose",
+        "-v",
+        help="Enable verbose output",
+    ),
+):
+    # Set up logging
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+
+    # Load documents from PDF files
+    pdf_documents = PdfLoaderWrapper().load_pdf_docs()
+    logger.info(f"Loaded {len(pdf_documents)} documents from PDF.")
+
+    # Load documents from CSV files
+    csv_documents = CsvLoaderWrapper().load_csv_docs()
+    logger.info(f"Loaded {len(csv_documents)} documents from CSV.")
+
+    # Combine all documents
+    documents = pdf_documents + csv_documents
+    logger.info(f"Total documents to add: {len(documents)}")
+
+    # Add documents to Cosmos DB
+    cosmosdb_client = CosmosdbClientWrapper()
+    ids = cosmosdb_client.add_documents(
+        documents=documents,
+    )
+    logger.info(f"Added {len(ids)} documents to Cosmos DB.")
+    for id in ids:
+        logger.debug(f"Added document ID: {id}")
+
+    # assert cosmosdb_client.delete_documents(ids=ids), "Failed to delete documents from Cosmos DB"
+
+
+@app.command()
+def similarity_search(
+    query: str = typer.Option(
+        "禅モード",
+        "--query",
+        "-q",
+        help="Query to search in the Cosmos DB index",
+    ),
+    k: int = typer.Option(
+        5,
+        "--k",
+        "-k",
+        help="Number of results to return from the similarity search",
+    ),
+    verbose: bool = typer.Option(
+        False,
+        "--verbose",
+        "-v",
+        help="Enable verbose output",
+    ),
+):
+    # Set up logging
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+
+    logger.info(f"Searching Cosmos DB with query: {query}")
+
+    # Perform similarity search
+    cosmosdb_client = CosmosdbClientWrapper()
+    documents = cosmosdb_client.similarity_search(
+        query=query,
+        k=k,  # Number of results to return
+    )
+    logger.info(f"Found {len(documents)} results for query: {query}")
+
+    # Log the results
+    for i, document in enumerate(documents, start=1):
+        logger.debug("-" * 40)
+        logger.debug(f"#{i}: {document.model_dump_json(indent=2)}")
+
+
+if __name__ == "__main__":
+    load_dotenv(
+        override=True,
+        verbose=True,
+    )
+    app()
diff --git a/scripts/test_all.sh b/scripts/test_all.sh
@@ -19,6 +19,11 @@ uv run python scripts/elasticsearch_operator.py create-index --index-name docs_k
 uv run python scripts/elasticsearch_operator.py add-documents --index-name docs_kabuto --verbose
 uv run python scripts/elasticsearch_operator.py search-documents --index-name docs_kabuto --query "禅モード" --verbose
 
+# Azure Cosmos DB NoSQL
+uv run python scripts/cosmosdb_operator.py --help
+uv run python scripts/cosmosdb_operator.py add-documents --verbose
+uv run python scripts/cosmosdb_operator.py similarity-search --query "禅モード" --k 3 --verbose
+
 # Agents
 
 ## Draw agent graph

diff --git a/template_langgraph/agents/chat_with_tools_agent/agent.py b/template_langgraph/agents/chat_with_tools_agent/agent.py
@@ -6,9 +6,7 @@
 from template_langgraph.agents.chat_with_tools_agent.models import AgentState
 from template_langgraph.llms.azure_openais import AzureOpenAiWrapper
 from template_langgraph.loggers import get_logger
-from template_langgraph.tools.dify_tool import run_dify_workflow
-from template_langgraph.tools.elasticsearch_tool import search_elasticsearch
-from template_langgraph.tools.qdrant_tool import search_qdrant
+from template_langgraph.tools.common import DEFAULT_TOOLS
 
 logger = get_logger(__name__)
 
@@ -40,11 +38,7 @@ def __call__(self, inputs: dict):
 class ChatWithToolsAgent:
     def __init__(self):
         self.llm = AzureOpenAiWrapper().chat_model
-        self.tools = [
-            run_dify_workflow,
-            search_qdrant,
-            search_elasticsearch,
-        ]
+        self.tools = DEFAULT_TOOLS
 
     def create_graph(self):
         """Create the main graph for the agent."""

diff --git a/template_langgraph/agents/kabuto_helpdesk_agent/agent.py b/template_langgraph/agents/kabuto_helpdesk_agent/agent.py
@@ -2,23 +2,15 @@
 
 from template_langgraph.llms.azure_openais import AzureOpenAiWrapper
 from template_langgraph.loggers import get_logger
-from template_langgraph.tools.dify_tool import run_dify_workflow
-from template_langgraph.tools.elasticsearch_tool import search_elasticsearch
-from template_langgraph.tools.qdrant_tool import search_qdrant
+from template_langgraph.tools.common import DEFAULT_TOOLS
 
 logger = get_logger(__name__)
 
 
 class KabutoHelpdeskAgent:
     def __init__(self, tools=None):
         if tools is None:
-            # Default tool for searching Qdrant
-            tools = [
-                run_dify_workflow,
-                search_qdrant,
-                search_elasticsearch,
-                # Add other tools as needed
-            ]
+            tools = DEFAULT_TOOLS
         self.agent = create_react_agent(
             model=AzureOpenAiWrapper().chat_model,
             tools=tools,

diff --git a/template_langgraph/tools/common.py b/template_langgraph/tools/common.py
@@ -0,0 +1,11 @@
+from template_langgraph.tools.cosmosdb_tool import search_cosmosdb
+from template_langgraph.tools.dify_tool import run_dify_workflow
+from template_langgraph.tools.elasticsearch_tool import search_elasticsearch
+from template_langgraph.tools.qdrant_tool import search_qdrant
+
+DEFAULT_TOOLS = [
+    search_cosmosdb,
+    run_dify_workflow,
+    search_qdrant,
+    search_elasticsearch,
+]
diff --git a/template_langgraph/tools/cosmosdb_tool.py b/template_langgraph/tools/cosmosdb_tool.py
@@ -0,0 +1,160 @@
+from functools import lru_cache
+
+from azure.cosmos import CosmosClient, PartitionKey
+from langchain_community.vectorstores.azure_cosmos_db_no_sql import (
+    AzureCosmosDBNoSqlVectorSearch,
+)
+from langchain_core.documents import Document
+from langchain_core.tools import tool
+from pydantic import BaseModel, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+from template_langgraph.llms.azure_openais import AzureOpenAiWrapper
+
+
+class Settings(BaseSettings):
+    cosmosdb_host: str = "<AZURE_COSMOS_DB_ENDPOINT>"
+    cosmosdb_key: str = "<AZURE_COSMOS_DB_KEY>"
+    cosmosdb_database_name: str = "template_langgraph"
+    cosmosdb_container_name: str = "kabuto"
+    cosmosdb_partition_key: str = "/id"
+
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_ignore_empty=True,
+        extra="ignore",
+    )
+
+
+@lru_cache
+def get_cosmosdb_settings() -> Settings:
+    """Get Cosmos DB settings."""
+    return Settings()
+
+
+class CosmosdbClientWrapper:
+    def __init__(
+        self,
+        settings: Settings = None,
+    ):
+        if settings is None:
+            settings = get_cosmosdb_settings()
+        self.vector_store = AzureCosmosDBNoSqlVectorSearch(
+            cosmos_client=CosmosClient(
+                url=settings.cosmosdb_host,
+                credential=settings.cosmosdb_key,
+            ),
+            embedding=AzureOpenAiWrapper().embedding_model,
+            vector_embedding_policy={
+                "vectorEmbeddings": [
+                    {
+                        "path": "/embedding",
+                        "dataType": "float32",
+                        "distanceFunction": "cosine",
+                        "dimensions": 1536,
+                    }
+                ]
+            },
+            indexing_policy={
+                "indexingMode": "consistent",
+                "includedPaths": [
+                    {"path": "/*"},
+                ],
+                "excludedPaths": [
+                    {"path": '/"_etag"/?'},
+                ],
+                "vectorIndexes": [
+                    {"path": "/embedding", "type": "diskANN"},
+                ],
+                "fullTextIndexes": [
+                    {"path": "/text"},
+                ],
+            },
+            cosmos_container_properties={
+                "partition_key": PartitionKey(path=settings.cosmosdb_partition_key),
+            },
+            cosmos_database_properties={},
+            full_text_policy={
+                "defaultLanguage": "en-US",
+                "fullTextPaths": [
+                    {
+                        "path": "/text",
+                        "language": "en-US",
+                    }
+                ],
+            },
+            database_name=settings.cosmosdb_database_name,
+            container_name=settings.cosmosdb_container_name,
+        )
+
+    def add_documents(
+        self,
+        documents: list[Document],
+    ) -> list[str]:
+        """Add documents to a Cosmos DB container."""
+        return self.vector_store.add_documents(
+            documents=documents,
+        )
+
+    def delete_documents(
+        self,
+        ids: list[str],
+    ) -> bool | None:
+        """Delete documents from a Cosmos DB container."""
+        return self.vector_store.delete(
+            ids=ids,
+        )
+
+    def similarity_search(
+        self,
+        query: str,
+        k: int = 5,
+    ) -> list[Document]:
+        """Perform a similarity search in the Cosmos DB index."""
+        return self.vector_store.similarity_search(
+            query=query,
+            k=k,  # Number of results to return
+        )
+
+
+class CosmosdbInput(BaseModel):
+    query: str = Field(
+        default="禅モード",
+        description="Query to search in the Cosmos DB index",
+    )
+    k: int = Field(
+        default=5,
+        description="Number of results to return from the similarity search",
+    )
+
+
+class CosmosdbOutput(BaseModel):
+    content: str = Field(description="Content of the document")
+    id: str = Field(description="ID of the document")
+
+
+@tool(args_schema=CosmosdbInput)
+def search_cosmosdb(query: str, k: int = 5) -> list[CosmosdbOutput]:
+    """Search for similar documents in CosmosDB vector store.
+
+    Args:
+        query: The search query string
+        k: Number of results to return (default: 5)
+
+    Returns:
+        CosmosdbOutput: A Pydantic model containing the search results
+    """
+    wrapper = CosmosdbClientWrapper()
+    documents = wrapper.similarity_search(
+        query=query,
+        k=k,
+    )
+    outputs = []
+    for document in documents:
+        outputs.append(
+            {
+                "content": document.page_content,
+                "id": document.id,
+            }
+        )
+    return outputs
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,3 +19,4 @@

		- [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
		- [Qdrant](https://github.com/qdrant/qdrant)
		- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)