Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
"id": "db0855d0",
"metadata": {},
"source": [
"# Azure CosmosDB MongoDB Vector Store\n",
"In this notebook we are going to show how to use Azure Cosmosdb Mongodb vCore to perform vector searches in LlamaIndex. We will create the embedding using Azure Open AI. "
"# Azure DocumentDB Vector Store\n",
"In this notebook we are going to show how to use Azure DocumentDB to perform vector searches in LlamaIndex. We will create the embedding using Azure Open AI. "
]
},
{
Expand All @@ -33,7 +33,7 @@
"outputs": [],
"source": [
"%pip install llama-index-embeddings-openai\n",
"%pip install llama-index-vector-stores-azurecosmosmongo\n",
"%pip install llama-index-vector-stores-azuredocumentdb\n",
"%pip install llama-index-llms-azure-openai"
]
},
Expand Down Expand Up @@ -169,7 +169,7 @@
"metadata": {},
"source": [
"### Create the index\n",
"Here we establish the connection to an Azure Cosmosdb mongodb vCore cluster and create an vector search index."
"Here we establish the connection to an Azure DocumentDB cluster and create an vector search index."
]
},
{
Expand All @@ -180,16 +180,16 @@
"outputs": [],
"source": [
"import pymongo\n",
"from llama_index.vector_stores.azurecosmosmongo import (\n",
" AzureCosmosDBMongoDBVectorSearch,\n",
"from llama_index.vector_stores.azuredocumentdb import (\n",
" AzureDocumentDBVectorSearch,\n",
")\n",
"from llama_index.core import VectorStoreIndex\n",
"from llama_index.core import StorageContext\n",
"from llama_index.core import SimpleDirectoryReader\n",
"\n",
"connection_string = os.environ.get(\"AZURE_COSMOSDB_MONGODB_URI\")\n",
"connection_string = os.environ.get(\"AZURE_DOCUMENTDB_URI\")\n",
"mongodb_client = pymongo.MongoClient(connection_string)\n",
"store = AzureCosmosDBMongoDBVectorSearch(\n",
"store = AzureDocumentDBVectorSearch(\n",
" mongodb_client=mongodb_client,\n",
" db_name=\"demo_vectordb\",\n",
" collection_name=\"paul_graham_essay\",\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ as the storage backend for `VectorStoreIndex`.
- Astra DB (`AstraDBVectorStore`). [Quickstart](https://docs.datastax.com/en/astra/home/astra.html).
- AWS Document DB (`AWSDocDbVectorStore`). [Quickstart](https://docs.aws.amazon.com/documentdb/latest/developerguide/get-started-guide.html).
- Azure AI Search (`AzureAISearchVectorStore`). [Quickstart](https://learn.microsoft.com/en-us/azure/search/search-get-started-vector)
- Azure Cosmos DB Mongo vCore(`AzureCosmosDBMongoDBVectorSearch`). [Quickstart](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search?tabs=diskann)
- Azure Cosmos DB NoSql (`AzureCosmosDBNoSqlVectorSearch`). [Quickstart](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/vector-search)
- Azure DocumentDB (`AzureDocumentDBVectorSearch`). [Quickstart](https://aka.ms/documentdb)
- Chroma (`ChromaVectorStore`) [Installation](https://docs.trychroma.com/getting-started)
- ClickHouse (`ClickHouseVectorStore`) [Installation](https://clickhouse.com/docs/en/install)
- Couchbase (`CouchbaseSearchVectorStore`) [Installation](https://www.couchbase.com/products/capella/)
Expand Down Expand Up @@ -243,21 +243,21 @@ vector_store = AzureAISearchVectorStore(
)
```

**Azure CosmosDB Mongo vCore**
**Azure DocumentDB**

```python
import pymongo
import os
from llama_index.vector_stores.azurecosmosmongo import (
AzureCosmosDBMongoDBVectorSearch,
from llama_index.vector_stores.azuredocumentdb import (
AzureDocumentDBVectorSearch,
)

# Set up the connection string with your Azure CosmosDB MongoDB URI
# Set up the connection string with your Azure DocumentDB URI
connection_string = os.getenv("YOUR_AZURE_COSMOSDB_MONGODB_URI")
mongodb_client = pymongo.MongoClient(connection_string)

# Create an instance of AzureCosmosDBMongoDBVectorSearch
vector_store = AzureCosmosDBMongoDBVectorSearch(
# Create an instance of AzureDocumentDBVectorSearch
vector_store = AzureDocumentDBVectorSearch(
mongodb_client=mongodb_client,
db_name="demo_vectordb",
collection_name="paul_graham_essay",
Expand Down
110 changes: 55 additions & 55 deletions docs/src/content/docs/framework/module_guides/storing/vector_stores.md

Large diffs are not rendered by default.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# LlamaIndex Chat_Store Integration: Azure DocumentDB Chat Store
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from llama_index.storage.chat_store.azuredocumentdb.base import (
AzureDocumentDBChatStore,
)

__all__ = ["AzureDocumentDBChatStore"]
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

logger = logging.getLogger(__name__)

APP_NAME = "Llama-Index-CDBMongoVCore-ChatStore-Python"
APP_NAME = "Llama-Index-AzureDocumentDB-ChatStore-Python"


# Convert a ChatMessage to a JSON object
Expand All @@ -29,8 +29,8 @@ def _dict_to_message(d: dict) -> ChatMessage:
return ChatMessage.model_validate(d)


class AzureCosmosMongoVCoreChatStore(BaseChatStore, ABC):
"""Creates an Azure Cosmos DB NoSql Chat Store."""
class AzureDocumentDBChatStore(BaseChatStore, ABC):
"""Creates an Azure DocumentDB Chat Store."""

_mongo_client = MongoClient
_database = Database
Expand Down Expand Up @@ -67,7 +67,7 @@ def from_connection_string(
db_name: Optional[str] = None,
collection_name: Optional[str] = None,
):
"""Creates an instance of AzureCosmosMongoVCoreChatStore using a connection string."""
"""Creates an instance of AzureDocumentDBChatStore using a connection string."""
# Parse the MongoDB URI
parsed_uri = urllib.parse.urlparse(connection_string)
# Extract username and password, and perform url_encoding
Expand All @@ -90,8 +90,8 @@ def from_host_and_port(
port: int,
db_name: Optional[str] = None,
collection_name: Optional[str] = None,
) -> "AzureCosmosMongoVCoreChatStore":
"""Initializes AzureCosmosMongoVCoreChatStore from an endpoint url and key."""
) -> "AzureDocumentDBChatStore":
"""Initializes AzureDocumentDBChatStore from an endpoint url and key."""
mongo_client = MongoClient(host=host, port=port, appname=APP_NAME)

return cls(
Expand Down Expand Up @@ -162,4 +162,4 @@ def get_keys(self) -> List[str]:
@classmethod
def class_name(cls) -> str:
"""Get class name."""
return "AzureCosmosMongoVCoreChatStore"
return "AzureDocumentDBChatStore"
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ dev = [
]

[project]
name = "llama-index-storage-chat-store-azurecosmosmongovcore"
name = "llama-index-storage-chat-store-azuredocumentdb"
version = "1.2.1"
description = "llama-index storage-chat-store azure cosmosdb mongo vcore integration"
description = "llama-index storage-chat-store Azure DocumentDB integration"
authors = [{name = "Aayush Kataria", email = "[email protected]"}]
requires-python = ">=3.9,<4.0"
readme = "README.md"
Expand All @@ -52,7 +52,7 @@ exclude = ["**/BUILD"]

[tool.llamahub]
contains_example = false
import_path = "llama_index.storage.chat_store.azurecosmosmongovcore"
import_path = "llama_index.storage.chat_store.azuredocumentdb"

[tool.llamahub.class_authors]
AzureCosmosNoSqlChatStore = "Aayush"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from llama_index.core.storage.chat_store.base import BaseChatStore
from llama_index.storage.chat_store.azuredocumentdb import (
AzureDocumentDBChatStore,
)


def test_class():
names_of_base_classes = [b.__name__ for b in AzureDocumentDBChatStore.__mro__]
assert BaseChatStore.__name__ in names_of_base_classes

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# LlamaIndex Vector_Stores Integration: Azure DocumentDB
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from llama_index.vector_stores.azuredocumentdb.base import (
AzureDocumentDBVectorSearch,
)

__all__ = ["AzureDocumentDBVectorSearch"]
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Azure CosmosDB MongoDB vCore Vector store index.
Azure DocumentDB Vector store index.

An index that is built on top of an existing vector store.

Expand All @@ -26,27 +26,27 @@
logger = logging.getLogger(__name__)


class AzureCosmosDBMongoDBVectorSearch(BasePydanticVectorStore):
class AzureDocumentDBVectorSearch(BasePydanticVectorStore):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can change the class names, but we should also just export an alias to make it non-breaking for users? Or is this worth a breaking change over?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@logan-markewich thanks for the comment. Let me discuss this with PMs, will back to you if we have any updates

"""
Azure CosmosDB MongoDB vCore Vector Store.
Azure DocumentDB Vector Store.

To use, you should have both:
- the ``pymongo`` python package installed
- a connection string associated with an Azure Cosmodb MongoDB vCore Cluster
- a connection string associated with an Azure DocumentDB

Examples:
`pip install llama-index-vector-stores-azurecosmosmongo`
`pip install llama-index-vector-stores-azuredocumentdb`

```python
import pymongo
from llama_index.vector_stores.azurecosmosmongo import AzureCosmosDBMongoDBVectorSearch
from llama_index.vector_stores.azuredocumentdb import AzureDocumentDBVectorSearch

# Set up the connection string with your Azure CosmosDB MongoDB URI
connection_string = "YOUR_AZURE_COSMOSDB_MONGODB_URI"
# Set up the connection string with your Azure DocumentDB URI
connection_string = "YOUR_AZURE_DOCUMENTDB_URI"
mongodb_client = pymongo.MongoClient(connection_string)

# Create an instance of AzureCosmosDBMongoDBVectorSearch
vector_store = AzureCosmosDBMongoDBVectorSearch(
# Create an instance of AzureDocumentDBVectorSearch
vector_store = AzureDocumentDBVectorSearch(
mongodb_client=mongodb_client,
db_name="demo_vectordb",
collection_name="paul_graham_essay",
Expand Down Expand Up @@ -88,17 +88,17 @@ def __init__(
Initialize the vector store.

Args:
mongodb_client: An Azure CosmoDB MongoDB client (type: MongoClient, shown any for lazy import).
db_name: An Azure CosmosDB MongoDB database name.
collection_name: An Azure CosmosDB collection name.
index_name: An Azure CosmosDB MongoDB vCore Vector Search index name.
mongodb_client: An Azure DocumentDB client (type: MongoClient, shown any for lazy import).
db_name: An Azure DocumentDB database name.
collection_name: An Azure DocumentDB collection name.
index_name: An Azure DocumentDB Vector Search index name.
id_key: The data field to use as the id.
embedding_key: An Azure CosmosDB MongoDB field that will contain
embedding_key: An Azure DocumentDB field that will contain
the embedding for each document.
text_key: An Azure CosmosDB MongoDB field that will contain the text for each document.
metadata_key: An Azure CosmosDB MongoDB field that will contain
text_key: An Azure DocumentDB field that will contain the text for each document.
metadata_key: An Azure DocumentDB field that will contain
the metadata for each document.
cosmos_search_kwargs: An Azure CosmosDB MongoDB field that will
cosmos_search_kwargs: An Azure DocumentDB field that will
contain search options, such as kind, numLists, similarity, and dimensions.
insert_kwargs: The kwargs used during `insert`.

Expand All @@ -108,13 +108,13 @@ def __init__(
if mongodb_client is not None:
self._mongodb_client = cast(pymongo.MongoClient, mongodb_client)
else:
if "AZURE_COSMOSDB_MONGODB_URI" not in os.environ:
if "AZURE_DOCUMENTDB_URI" not in os.environ:
raise ValueError(
"Must specify Azure cosmodb 'AZURE_COSMOSDB_MONGODB_URI' via env variable "
"Must specify Azure DocumentDB 'AZURE_DOCUMENTDB_URI' via env variable "
"if not directly passing in client."
)
self._mongodb_client = pymongo.MongoClient(
os.environ["AZURE_COSMOSDB_MONGODB_URI"],
os.environ["AZURE_DOCUMENTDB_URI"],
appname="LLAMAINDEX_PYTHON",
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ dev = [
]

[project]
name = "llama-index-vector-stores-azurecosmosmongo"
name = "llama-index-vector-stores-azuredocumentdb"
version = "0.7.1"
description = "llama-index vector_stores azurecosmosmongo integration"
description = "llama-index vector_stores azuredocumentdb integration"
authors = [{name = "Aayush Kataria", email = "[email protected]"}]
requires-python = ">=3.9,<4.0"
readme = "README.md"
Expand All @@ -52,10 +52,10 @@ exclude = ["**/BUILD"]

[tool.llamahub]
contains_example = false
import_path = "llama_index.vector_stores.azurecosmosmongo"
import_path = "llama_index.vector_stores.azuredocumentdb"

[tool.llamahub.class_authors]
AzureCosmosDBMongoDBVectorSearch = "llama-index"
AzureDocumentDBVectorSearch = "llama-index"

[tool.mypy]
disallow_untyped_defs = true
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Test Azue CosmosDB MongoDB vCore Vector Search functionality."""
"""Test Azure DocumentDB Vector Search functionality."""

from __future__ import annotations

Expand All @@ -13,7 +13,7 @@

INDEX_NAME = "llamaindex-test-index"
NAMESPACE = "llamaindex_test_db.llamaindex_test_collection"
CONNECTION_STRING = os.environ.get("AZURE_COSMOSDB_MONGODB_URI")
CONNECTION_STRING = os.environ.get("AZURE_DOCUMENTDB_URI")
DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
test_client = MongoClient(CONNECTION_STRING) # type: ignore
collection = test_client[DB_NAME][COLLECTION_NAME]
Expand All @@ -24,7 +24,7 @@

from llama_index.core.schema import NodeRelationship, RelatedNodeInfo, TextNode
from llama_index.core.vector_stores.types import VectorStoreQuery
from llama_index.vector_stores.azurecosmosmongo import AzureCosmosDBMongoDBVectorSearch
from llama_index.vector_stores.azuredocumentdb import AzureDocumentDBVectorSearch


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -63,8 +63,8 @@ def node_embeddings() -> list[TextNode]:


@pytest.mark.skipif(not pymongo_available, reason="pymongo is not available")
@pytest.mark.skip(reason="Need to manually provide a valid Azure CosmosDB MongoDB URI")
class TestAzureMongovCoreVectorSearch:
@pytest.mark.skip(reason="Need to manually provide a valid Azure DocumentDB URI")
class TestAzureDocumentDBVectorSearch:
@classmethod
def setup_class(cls) -> None:
# insure the test collection is empty
Expand All @@ -81,14 +81,14 @@ def setup(self) -> None:
collection.delete_many({}) # type: ignore[index]

def test_add_and_delete(self) -> None:
vector_store = AzureCosmosDBMongoDBVectorSearch(
vector_store = AzureDocumentDBVectorSearch(
mongodb_client=test_client, # type: ignore
db_name=DB_NAME,
collection_name=COLLECTION_NAME,
index_name=INDEX_NAME,
cosmos_search_kwargs={"dimensions": 3},
)
sleep(1) # waits for azure cosmosdb mongodb to update
sleep(1) # waits for Azure DocumentDB to update
vector_store.add(
[
TextNode(
Expand All @@ -109,15 +109,15 @@ def test_add_and_delete(self) -> None:
assert collection.count_documents({}) == 0

def test_query(self, node_embeddings: List[TextNode]) -> None:
vector_store = AzureCosmosDBMongoDBVectorSearch(
vector_store = AzureDocumentDBVectorSearch(
mongodb_client=test_client, # type: ignore
db_name=DB_NAME,
collection_name=COLLECTION_NAME,
index_name=INDEX_NAME,
cosmos_search_kwargs={"dimensions": 3},
)
vector_store.add(node_embeddings) # type: ignore
sleep(1) # wait for azure cosmodb mongodb to update the index
sleep(1) # wait for Azure DocumentDB to update the index

res = vector_store.query(
VectorStoreQuery(query_embedding=[1.0, 0.0, 0.0], similarity_top_k=1)
Expand Down
Loading