Skip to content

Commit 27c20fe

Browse files
authored
Merge pull request #176 from ks6088ts-labs/feature/issue-174_update-codes
Update Azure Cosmos DB integration and setup documentation
2 parents 1228a41 + 3cff04a commit 27c20fe

File tree

5 files changed

+133
-96
lines changed

5 files changed

+133
-96
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Azure OpenAI Service
2+
AZURE_OPENAI_ENDPOINT="https://<YOUR_AOAI_NAME>.openai.azure.com/"
3+
AZURE_OPENAI_API_KEY="<YOUR_API_KEY>"
4+
AZURE_OPENAI_API_VERSION="2024-10-21"
5+
AZURE_OPENAI_EMBEDDING_MODEL="text-embedding-3-large"
6+
7+
# Azure Cosmos DB
8+
AZURE_COSMOS_DB_CONNECTION_STRING="AccountEndpoint=https://<YOUR_COSMOSDB_NAME>.documents.azure.com:443/;AccountKey=<ACCOUNT_KEY>;"
9+
AZURE_COSMOS_DB_DATABASE_NAME="workshop"
10+
AZURE_COSMOS_DB_CONTAINER_NAME="chat"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Setup
2+
3+
```shell
4+
# Set up virtual environment
5+
python -m venv .venv
6+
source .venv/bin/activate
7+
8+
# Install dependencies
9+
pip install typer python-dotenv azure-cosmos langchain-openai langchain-community
10+
# pip install -r requirements.txt
11+
12+
python vector_database.py --help
13+
```
14+
15+
# References
16+
17+
- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)
18+
- [Learn Azure Azure Cosmos DB Vector database](https://learn.microsoft.com/azure/cosmos-db/vector-database)
19+
- [AzureDataRetrievalAugmentedGenerationSamples/Python/CosmosDB-NoSQL_VectorSearch](https://github.com/microsoft/AzureDataRetrievalAugmentedGenerationSamples/tree/main/Python/CosmosDB-NoSQL_VectorSearch)
20+
- [Azure Cosmos DB ベクター検索機能と RAG の実装ガイド](https://note.com/generativeai_new/n/n3fcb2e57d195)
21+
- [Azure CosmosDB for NoSQL でベクトル検索しよう!!](https://zenn.dev/nomhiro/articles/cosmos-nosql-vector-search)

apps/3_call_azure_cosmos_db/requirements.txt

Whitespace-only changes.

apps/3_call_azure_cosmos_db/vector_database.py

Lines changed: 44 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,46 @@
1414
app = typer.Typer()
1515

1616

17-
# https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/
17+
def get_vector_embedding_policy():
18+
return {
19+
"vectorEmbeddings": [
20+
{
21+
"path": "/embedding",
22+
"dataType": "float32",
23+
"distanceFunction": "cosine",
24+
"dimensions": 3072, # for text-embedding-3-large
25+
}
26+
]
27+
}
28+
29+
30+
def get_indexing_policy():
31+
return {
32+
"indexingMode": "consistent",
33+
"includedPaths": [{"path": "/*"}],
34+
"excludedPaths": [{"path": '/"_etag"/?'}],
35+
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
36+
}
37+
38+
39+
def get_azure_cosmos_db_no_sql_vector_search():
40+
return AzureCosmosDBNoSqlVectorSearch(
41+
embedding=AzureOpenAIEmbeddings(
42+
api_key=getenv("AZURE_OPENAI_API_KEY"),
43+
api_version=getenv("AZURE_OPENAI_API_VERSION"),
44+
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
45+
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
46+
),
47+
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
48+
database_name=getenv("AZURE_COSMOS_DB_DATABASE_NAME"),
49+
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
50+
vector_embedding_policy=get_vector_embedding_policy(),
51+
indexing_policy=get_indexing_policy(),
52+
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
53+
cosmos_database_properties={"id": getenv("AZURE_COSMOS_DB_DATABASE_NAME")},
54+
)
55+
56+
1857
@app.command()
1958
def insert_data(
2059
pdf_url: str = "https://arxiv.org/pdf/2303.08774.pdf",
@@ -36,38 +75,8 @@ def insert_data(
3675
).split_documents(data)
3776

3877
try:
39-
# Insert the data into Azure Cosmos DB
40-
database_name = getenv("AZURE_COSMOS_DB_DATABASE_NAME")
41-
AzureCosmosDBNoSqlVectorSearch.from_documents(
42-
documents=docs,
43-
embedding=AzureOpenAIEmbeddings(
44-
api_key=getenv("AZURE_OPENAI_API_KEY"),
45-
api_version=getenv("AZURE_OPENAI_API_VERSION"),
46-
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
47-
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
48-
),
49-
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
50-
database_name=database_name,
51-
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
52-
vector_embedding_policy={
53-
"vectorEmbeddings": [
54-
{
55-
"path": "/embedding",
56-
"dataType": "float32",
57-
"distanceFunction": "cosine",
58-
"dimensions": 3072, # for text-embedding-3-large
59-
}
60-
]
61-
},
62-
indexing_policy={
63-
"indexingMode": "consistent",
64-
"includedPaths": [{"path": "/*"}],
65-
"excludedPaths": [{"path": '/"_etag"/?'}],
66-
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
67-
},
68-
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
69-
cosmos_database_properties={"id": database_name}, # need to add this
70-
)
78+
vector_search = get_azure_cosmos_db_no_sql_vector_search()
79+
vector_search.add_documents(docs)
7180
except Exception as e:
7281
logger.error(f"error: {e}")
7382

@@ -79,40 +88,10 @@ def query_data(
7988
):
8089
if verbose:
8190
logging.basicConfig(level=logging.DEBUG)
82-
83-
database_name = getenv("AZURE_COSMOS_DB_DATABASE_NAME")
84-
vector_search = AzureCosmosDBNoSqlVectorSearch(
85-
embedding=AzureOpenAIEmbeddings(
86-
api_key=getenv("AZURE_OPENAI_API_KEY"),
87-
api_version=getenv("AZURE_OPENAI_API_VERSION"),
88-
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
89-
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
90-
),
91-
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
92-
database_name=database_name,
93-
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
94-
vector_embedding_policy={
95-
"vectorEmbeddings": [
96-
{
97-
"path": "/embedding",
98-
"dataType": "float32",
99-
"distanceFunction": "cosine",
100-
"dimensions": 3072, # for text-embedding-3-large
101-
}
102-
]
103-
},
104-
indexing_policy={
105-
"indexingMode": "consistent",
106-
"includedPaths": [{"path": "/*"}],
107-
"excludedPaths": [{"path": '/"_etag"/?'}],
108-
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
109-
},
110-
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
111-
cosmos_database_properties={"id": database_name},
112-
)
113-
11491
try:
92+
vector_search = get_azure_cosmos_db_no_sql_vector_search()
11593
results = vector_search.similarity_search(query=query)
94+
logger.info(f"got {len(results)} results")
11695
for idx, result in enumerate(results):
11796
print(f"Result {idx + 1}: {result}")
11897
except Exception as e:

requirements.txt

Lines changed: 58 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,60 @@
1-
openai==1.43.1
1+
aiohappyeyeballs==2.4.3
2+
aiohttp==3.11.7
3+
aiosignal==1.3.1
4+
annotated-types==0.7.0
5+
anyio==4.6.2.post1
6+
attrs==24.2.0
7+
azure-core==1.32.0
8+
azure-cosmos==4.9.0
9+
certifi==2024.8.30
10+
charset-normalizer==3.4.0
11+
click==8.1.7
12+
dataclasses-json==0.6.7
13+
distro==1.9.0
14+
frozenlist==1.5.0
15+
h11==0.14.0
16+
httpcore==1.0.7
17+
httpx==0.27.2
18+
httpx-sse==0.4.0
19+
idna==3.10
20+
jiter==0.7.1
21+
jsonpatch==1.33
22+
jsonpointer==3.0.0
23+
langchain==0.3.8
24+
langchain-community==0.3.8
25+
langchain-core==0.3.21
26+
langchain-openai==0.2.9
27+
langchain-text-splitters==0.3.2
28+
langsmith==0.1.145
29+
markdown-it-py==3.0.0
30+
marshmallow==3.23.1
31+
mdurl==0.1.2
32+
multidict==6.1.0
33+
mypy-extensions==1.0.0
34+
numpy==1.26.4
35+
openai==1.55.0
36+
orjson==3.10.12
37+
packaging==24.2
38+
propcache==0.2.0
39+
pydantic==2.10.1
40+
pydantic-settings==2.6.1
41+
pydantic_core==2.27.1
42+
Pygments==2.18.0
243
python-dotenv==1.0.1
3-
streamlit==1.38.0
4-
azure-cosmos==4.7.0
5-
plotly==5.24.1
6-
pandas==2.2.2
7-
langchain==0.2.16
8-
langchain-openai==0.1.25
9-
langchain-community==0.2.12
10-
azure-search-documents==11.5.1
11-
azure-identity==1.18.0
12-
azure-ai-documentintelligence==1.0.0b4
13-
azure-storage-blob==12.23.1
44+
PyYAML==6.0.2
45+
regex==2024.11.6
1446
requests==2.32.3
15-
promptflow==1.15.0
16-
promptflow-evals==0.3.2
17-
langgraph==0.2.23
18-
langchain-chroma==0.1.4
19-
beautifulsoup4==4.12.3
20-
langgraph-checkpoint-sqlite==1.0.4
21-
playwright==1.47.0
22-
lxml==5.3.0
23-
nest-asyncio==1.6.0
24-
typer==0.12.5
25-
26-
# To run 99_streamlit_examples/pages/10_Object_Detection.py
27-
# ultralytics==8.2.89
28-
29-
# To run 99_streamlit_examples/pages/11_Pose_Estimation.py
30-
# mediapipe==0.10.14
31-
32-
# To run 99_streamlit_examples/pages/12_Video_processing.py
33-
# opencv-python-headless==4.10.0.84
47+
requests-toolbelt==1.0.0
48+
rich==13.9.4
49+
shellingham==1.5.4
50+
six==1.16.0
51+
sniffio==1.3.1
52+
SQLAlchemy==2.0.35
53+
tenacity==9.0.0
54+
tiktoken==0.8.0
55+
tqdm==4.67.0
56+
typer==0.13.1
57+
typing-inspect==0.9.0
58+
typing_extensions==4.12.2
59+
urllib3==2.2.3
60+
yarl==1.18.0

0 commit comments

Comments
 (0)