Skip to content

Commit 2e30cc5

Browse files
author
ks6088ts
committed
Add Azure Cosmos DB integration and setup documentation
1 parent 1228a41 commit 2e30cc5

File tree

4 files changed

+70
-65
lines changed

4 files changed

+70
-65
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Azure OpenAI Service
2+
AZURE_OPENAI_ENDPOINT="https://<YOUR_AOAI_NAME>.openai.azure.com/"
3+
AZURE_OPENAI_API_KEY="<YOUR_API_KEY>"
4+
AZURE_OPENAI_API_VERSION="2024-10-21"
5+
AZURE_OPENAI_EMBEDDING_MODEL="text-embedding-3-large"
6+
7+
# Azure Cosmos DB
8+
AZURE_COSMOS_DB_CONNECTION_STRING="AccountEndpoint=https://<YOUR_COSMOSDB_NAME>.documents.azure.com:443/;AccountKey=<ACCOUNT_KEY>;"
9+
AZURE_COSMOS_DB_DATABASE_NAME="workshop"
10+
AZURE_COSMOS_DB_CONTAINER_NAME="chat"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Setup
2+
3+
```shell
4+
# Set up virtual environment
5+
python -m venv .venv
6+
source .venv/bin/activate
7+
8+
# Install dependencies
9+
pip install typer python-dotenv azure-cosmos langchain-openai langchain-community
10+
# pip install -r requirements.txt
11+
12+
python vector_database.py --help
13+
```
14+
15+
# References
16+
17+
- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)

apps/3_call_azure_cosmos_db/requirements.txt

Whitespace-only changes.

apps/3_call_azure_cosmos_db/vector_database.py

Lines changed: 43 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,45 @@
1414
app = typer.Typer()
1515

1616

17-
# https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/
17+
def get_vector_embedding_policy():
18+
return {
19+
"vectorEmbeddings": [
20+
{
21+
"path": "/embedding",
22+
"dataType": "float32",
23+
"distanceFunction": "cosine",
24+
"dimensions": 3072, # for text-embedding-3-large
25+
}
26+
]
27+
}
28+
29+
30+
def get_indexing_policy():
31+
return {
32+
"indexingMode": "consistent",
33+
"includedPaths": [{"path": "/*"}],
34+
"excludedPaths": [{"path": '/"_etag"/?'}],
35+
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
36+
}
37+
38+
39+
def get_azure_cosmos_db_no_sql_vector_search():
40+
return AzureCosmosDBNoSqlVectorSearch(
41+
embedding=AzureOpenAIEmbeddings(
42+
api_key=getenv("AZURE_OPENAI_API_KEY"),
43+
api_version=getenv("AZURE_OPENAI_API_VERSION"),
44+
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
45+
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
46+
),
47+
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
48+
database_name=getenv("AZURE_COSMOS_DB_DATABASE_NAME"),
49+
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
50+
vector_embedding_policy=get_vector_embedding_policy(),
51+
indexing_policy=get_indexing_policy(),
52+
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
53+
cosmos_database_properties={"id": getenv("AZURE_COSMOS_DB_DATABASE_NAME")},
54+
)
55+
1856
@app.command()
1957
def insert_data(
2058
pdf_url: str = "https://arxiv.org/pdf/2303.08774.pdf",
@@ -36,38 +74,8 @@ def insert_data(
3674
).split_documents(data)
3775

3876
try:
39-
# Insert the data into Azure Cosmos DB
40-
database_name = getenv("AZURE_COSMOS_DB_DATABASE_NAME")
41-
AzureCosmosDBNoSqlVectorSearch.from_documents(
42-
documents=docs,
43-
embedding=AzureOpenAIEmbeddings(
44-
api_key=getenv("AZURE_OPENAI_API_KEY"),
45-
api_version=getenv("AZURE_OPENAI_API_VERSION"),
46-
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
47-
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
48-
),
49-
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
50-
database_name=database_name,
51-
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
52-
vector_embedding_policy={
53-
"vectorEmbeddings": [
54-
{
55-
"path": "/embedding",
56-
"dataType": "float32",
57-
"distanceFunction": "cosine",
58-
"dimensions": 3072, # for text-embedding-3-large
59-
}
60-
]
61-
},
62-
indexing_policy={
63-
"indexingMode": "consistent",
64-
"includedPaths": [{"path": "/*"}],
65-
"excludedPaths": [{"path": '/"_etag"/?'}],
66-
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
67-
},
68-
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
69-
cosmos_database_properties={"id": database_name}, # need to add this
70-
)
77+
vector_search = get_azure_cosmos_db_no_sql_vector_search()
78+
vector_search.add_documents(docs)
7179
except Exception as e:
7280
logger.error(f"error: {e}")
7381

@@ -79,40 +87,10 @@ def query_data(
7987
):
8088
if verbose:
8189
logging.basicConfig(level=logging.DEBUG)
82-
83-
database_name = getenv("AZURE_COSMOS_DB_DATABASE_NAME")
84-
vector_search = AzureCosmosDBNoSqlVectorSearch(
85-
embedding=AzureOpenAIEmbeddings(
86-
api_key=getenv("AZURE_OPENAI_API_KEY"),
87-
api_version=getenv("AZURE_OPENAI_API_VERSION"),
88-
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
89-
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
90-
),
91-
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
92-
database_name=database_name,
93-
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
94-
vector_embedding_policy={
95-
"vectorEmbeddings": [
96-
{
97-
"path": "/embedding",
98-
"dataType": "float32",
99-
"distanceFunction": "cosine",
100-
"dimensions": 3072, # for text-embedding-3-large
101-
}
102-
]
103-
},
104-
indexing_policy={
105-
"indexingMode": "consistent",
106-
"includedPaths": [{"path": "/*"}],
107-
"excludedPaths": [{"path": '/"_etag"/?'}],
108-
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
109-
},
110-
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
111-
cosmos_database_properties={"id": database_name},
112-
)
113-
11490
try:
91+
vector_search = get_azure_cosmos_db_no_sql_vector_search()
11592
results = vector_search.similarity_search(query=query)
93+
logger.info(f"got {len(results)} results")
11694
for idx, result in enumerate(results):
11795
print(f"Result {idx + 1}: {result}")
11896
except Exception as e:

0 commit comments

Comments
 (0)