Skip to content

Commit 33a59aa

Browse files
committed
add cosmosdb tool
1 parent 4c0eca3 commit 33a59aa

File tree

9 files changed

+310
-0
lines changed

9 files changed

+310
-0
lines changed

.env.template

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ ELASTICSEARCH_URL="http://localhost:9200"
3333
DIFY_API_URL="https://api.dify.ai/v1"
3434
DIFY_API_KEY="xxx"
3535

36+
## Cosmos DB Settings
37+
COSMOSDB_HOST="https://xxx.documents.azure.com:443/"
38+
COSMOSDB_KEY="xxx"
39+
COSMOSDB_DATABASE_NAME="langgraph"
40+
COSMOSDB_CONTAINER_NAME="docs_kabuto"
41+
COSMOSDB_PARTITION_KEY="/id"
42+
3643
# ---------
3744
# Utilities
3845
# ---------

docs/references.ja.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@
1919

2020
- [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
2121
- [Qdrant](https://github.com/qdrant/qdrant)
22+
- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)

docs/references.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@
1919

2020
- [CSVLoader](https://python.langchain.com/docs/how_to/document_loader_csv/)
2121
- [Qdrant](https://github.com/qdrant/qdrant)
22+
- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ description = "A GitHub template repository for Python"
55
readme = "README.md"
66
requires-python = ">=3.10"
77
dependencies = [
8+
"azure-cosmos>=4.9.0",
89
"elasticsearch>=9.1.0",
910
"httpx>=0.28.1",
1011
"langchain-community>=0.3.27",

scripts/cosmosdb_operator.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import logging
2+
3+
import typer
4+
from dotenv import load_dotenv
5+
6+
from template_langgraph.loggers import get_logger
7+
from template_langgraph.tools.cosmosdb_tool import CosmosdbClientWrapper
8+
from template_langgraph.utilities.csv_loaders import CsvLoaderWrapper
9+
from template_langgraph.utilities.pdf_loaders import PdfLoaderWrapper
10+
11+
# Initialize the Typer application
12+
app = typer.Typer(
13+
add_completion=False,
14+
help="Cosmos DB operator CLI",
15+
)
16+
17+
# Set up logging
18+
logger = get_logger(__name__)
19+
20+
21+
@app.command()
22+
def add_documents(
23+
verbose: bool = typer.Option(
24+
False,
25+
"--verbose",
26+
"-v",
27+
help="Enable verbose output",
28+
),
29+
):
30+
# Set up logging
31+
if verbose:
32+
logger.setLevel(logging.DEBUG)
33+
34+
# Load documents from PDF files
35+
pdf_documents = PdfLoaderWrapper().load_pdf_docs()
36+
logger.info(f"Loaded {len(pdf_documents)} documents from PDF.")
37+
38+
# Load documents from CSV files
39+
csv_documents = CsvLoaderWrapper().load_csv_docs()
40+
logger.info(f"Loaded {len(csv_documents)} documents from CSV.")
41+
42+
# Combine all documents
43+
documents = pdf_documents + csv_documents
44+
logger.info(f"Total documents to add: {len(documents)}")
45+
46+
# Add documents to Cosmos DB
47+
cosmosdb_client = CosmosdbClientWrapper()
48+
ids = cosmosdb_client.add_documents(
49+
documents=documents,
50+
)
51+
logger.info(f"Added {len(ids)} documents to Cosmos DB.")
52+
for id in ids:
53+
logger.debug(f"Added document ID: {id}")
54+
55+
# assert cosmosdb_client.delete_documents(ids=ids), "Failed to delete documents from Cosmos DB"
56+
57+
58+
@app.command()
59+
def similarity_search(
60+
query: str = typer.Option(
61+
"禅モード",
62+
"--query",
63+
"-q",
64+
help="Query to search in the Cosmos DB index",
65+
),
66+
k: int = typer.Option(
67+
5,
68+
"--k",
69+
"-k",
70+
help="Number of results to return from the similarity search",
71+
),
72+
verbose: bool = typer.Option(
73+
False,
74+
"--verbose",
75+
"-v",
76+
help="Enable verbose output",
77+
),
78+
):
79+
# Set up logging
80+
if verbose:
81+
logger.setLevel(logging.DEBUG)
82+
83+
logger.info(f"Searching Cosmos DB with query: {query}")
84+
85+
# Perform similarity search
86+
cosmosdb_client = CosmosdbClientWrapper()
87+
documents = cosmosdb_client.similarity_search(
88+
query=query,
89+
k=k, # Number of results to return
90+
)
91+
logger.info(f"Found {len(documents)} results for query: {query}")
92+
93+
# Log the results
94+
for i, document in enumerate(documents, start=1):
95+
logger.debug("-" * 40)
96+
logger.debug(f"#{i}: {document.model_dump_json(indent=2)}")
97+
98+
99+
if __name__ == "__main__":
100+
load_dotenv(
101+
override=True,
102+
verbose=True,
103+
)
104+
app()

scripts/test_all.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ uv run python scripts/elasticsearch_operator.py create-index --index-name docs_k
1919
uv run python scripts/elasticsearch_operator.py add-documents --index-name docs_kabuto --verbose
2020
uv run python scripts/elasticsearch_operator.py search-documents --index-name docs_kabuto --query "禅モード" --verbose
2121

22+
# Azure Cosmos DB NoSQL
23+
uv run python scripts/cosmosdb_operator.py --help
24+
uv run python scripts/cosmosdb_operator.py add-documents --verbose
25+
uv run python scripts/cosmosdb_operator.py similarity-search --query "禅モード" --k 3 --verbose
26+
2227
# Agents
2328

2429
## Draw agent graph

template_langgraph/agents/chat_with_tools_agent/agent.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from template_langgraph.agents.chat_with_tools_agent.models import AgentState
77
from template_langgraph.llms.azure_openais import AzureOpenAiWrapper
88
from template_langgraph.loggers import get_logger
9+
from template_langgraph.tools.cosmosdb_tool import search_cosmosdb
910
from template_langgraph.tools.dify_tool import run_dify_workflow
1011
from template_langgraph.tools.elasticsearch_tool import search_elasticsearch
1112
from template_langgraph.tools.qdrant_tool import search_qdrant
@@ -41,6 +42,7 @@ class ChatWithToolsAgent:
4142
def __init__(self):
4243
self.llm = AzureOpenAiWrapper().chat_model
4344
self.tools = [
45+
search_cosmosdb,
4446
run_dify_workflow,
4547
search_qdrant,
4648
search_elasticsearch,
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
from functools import lru_cache
2+
3+
from azure.cosmos import CosmosClient, PartitionKey
4+
from langchain_community.vectorstores.azure_cosmos_db_no_sql import (
5+
AzureCosmosDBNoSqlVectorSearch,
6+
)
7+
from langchain_core.documents import Document
8+
from langchain_core.tools import tool
9+
from pydantic import BaseModel, Field
10+
from pydantic_settings import BaseSettings, SettingsConfigDict
11+
12+
from template_langgraph.llms.azure_openais import AzureOpenAiWrapper
13+
14+
15+
class Settings(BaseSettings):
16+
cosmosdb_host: str = "<AZURE_COSMOS_DB_ENDPOINT>"
17+
cosmosdb_key: str = "<AZURE_COSMOS_DB_KEY>"
18+
cosmosdb_database_name: str = "template_langgraph"
19+
cosmosdb_container_name: str = "kabuto"
20+
cosmosdb_partition_key: str = "/id"
21+
22+
model_config = SettingsConfigDict(
23+
env_file=".env",
24+
env_ignore_empty=True,
25+
extra="ignore",
26+
)
27+
28+
29+
@lru_cache
30+
def get_cosmosdb_settings() -> Settings:
31+
"""Get Cosmos DB settings."""
32+
return Settings()
33+
34+
35+
class CosmosdbClientWrapper:
36+
def __init__(
37+
self,
38+
settings: Settings = None,
39+
):
40+
if settings is None:
41+
settings = get_cosmosdb_settings()
42+
self.vector_store = AzureCosmosDBNoSqlVectorSearch(
43+
cosmos_client=CosmosClient(
44+
url=settings.cosmosdb_host,
45+
credential=settings.cosmosdb_key,
46+
),
47+
embedding=AzureOpenAiWrapper().embedding_model,
48+
vector_embedding_policy={
49+
"vectorEmbeddings": [
50+
{
51+
"path": "/embedding",
52+
"dataType": "float32",
53+
"distanceFunction": "cosine",
54+
"dimensions": 1536,
55+
}
56+
]
57+
},
58+
indexing_policy={
59+
"indexingMode": "consistent",
60+
"includedPaths": [
61+
{"path": "/*"},
62+
],
63+
"excludedPaths": [
64+
{"path": '/"_etag"/?'},
65+
],
66+
"vectorIndexes": [
67+
{"path": "/embedding", "type": "diskANN"},
68+
],
69+
"fullTextIndexes": [
70+
{"path": "/text"},
71+
],
72+
},
73+
cosmos_container_properties={
74+
"partition_key": PartitionKey(path=settings.cosmosdb_partition_key),
75+
},
76+
cosmos_database_properties={},
77+
full_text_policy={
78+
"defaultLanguage": "en-US",
79+
"fullTextPaths": [
80+
{
81+
"path": "/text",
82+
"language": "en-US",
83+
}
84+
],
85+
},
86+
database_name=settings.cosmosdb_database_name,
87+
container_name=settings.cosmosdb_container_name,
88+
)
89+
90+
def add_documents(
91+
self,
92+
documents: list[Document],
93+
) -> list[str]:
94+
"""Add documents to a Cosmos DB container."""
95+
return self.vector_store.add_documents(
96+
documents=documents,
97+
)
98+
99+
def delete_documents(
100+
self,
101+
ids: list[str],
102+
) -> bool | None:
103+
"""Delete documents from a Cosmos DB container."""
104+
return self.vector_store.delete(
105+
ids=ids,
106+
)
107+
108+
def similarity_search(
109+
self,
110+
query: str,
111+
k: int = 5,
112+
) -> list[Document]:
113+
"""Perform a similarity search in the Cosmos DB index."""
114+
return self.vector_store.similarity_search(
115+
query=query,
116+
k=k, # Number of results to return
117+
)
118+
119+
120+
class CosmosdbInput(BaseModel):
121+
query: str = Field(
122+
default="禅モード",
123+
description="Query to search in the Cosmos DB index",
124+
)
125+
k: int = Field(
126+
default=5,
127+
description="Number of results to return from the similarity search",
128+
)
129+
130+
131+
class CosmosdbOutput(BaseModel):
132+
content: str = Field(description="Content of the document")
133+
id: str = Field(description="ID of the document")
134+
135+
136+
@tool(args_schema=CosmosdbInput)
137+
def search_cosmosdb(query: str, k: int = 5) -> list[CosmosdbOutput]:
138+
"""Search for similar documents in CosmosDB vector store.
139+
140+
Args:
141+
query: The search query string
142+
k: Number of results to return (default: 5)
143+
144+
Returns:
145+
CosmosdbOutput: A Pydantic model containing the search results
146+
"""
147+
wrapper = CosmosdbClientWrapper()
148+
documents = wrapper.similarity_search(
149+
query=query,
150+
k=k,
151+
)
152+
outputs = []
153+
for document in documents:
154+
outputs.append(
155+
{
156+
"content": document.page_content,
157+
"id": document.id,
158+
}
159+
)
160+
return outputs

uv.lock

Lines changed: 29 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)