Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions apps/3_call_azure_cosmos_db/.env.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Azure OpenAI Service
AZURE_OPENAI_ENDPOINT="https://<YOUR_AOAI_NAME>.openai.azure.com/"
AZURE_OPENAI_API_KEY="<YOUR_API_KEY>"
AZURE_OPENAI_API_VERSION="2024-10-21"
AZURE_OPENAI_EMBEDDING_MODEL="text-embedding-3-large"

# Azure Cosmos DB
AZURE_COSMOS_DB_CONNECTION_STRING="AccountEndpoint=https://<YOUR_COSMOSDB_NAME>.documents.azure.com:443/;AccountKey=<ACCOUNT_KEY>;"
AZURE_COSMOS_DB_DATABASE_NAME="workshop"
AZURE_COSMOS_DB_CONTAINER_NAME="chat"
21 changes: 21 additions & 0 deletions apps/3_call_azure_cosmos_db/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Setup

```shell
# Set up virtual environment
python -m venv .venv
source .venv/bin/activate

# Install dependencies
pip install typer python-dotenv azure-cosmos langchain-openai langchain-community
# pip install -r requirements.txt

python vector_database.py --help
```

# References

- [Azure Cosmos DB No SQL](https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/)
- [Learn Azure Azure Cosmos DB Vector database](https://learn.microsoft.com/azure/cosmos-db/vector-database)
- [AzureDataRetrievalAugmentedGenerationSamples/Python/CosmosDB-NoSQL_VectorSearch](https://github.com/microsoft/AzureDataRetrievalAugmentedGenerationSamples/tree/main/Python/CosmosDB-NoSQL_VectorSearch)
- [Azure Cosmos DB ベクター検索機能と RAG の実装ガイド](https://note.com/generativeai_new/n/n3fcb2e57d195)
- [Azure CosmosDB for NoSQL でベクトル検索しよう!!](https://zenn.dev/nomhiro/articles/cosmos-nosql-vector-search)
Empty file.
109 changes: 44 additions & 65 deletions apps/3_call_azure_cosmos_db/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,46 @@
app = typer.Typer()


# https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/
def get_vector_embedding_policy():
return {
"vectorEmbeddings": [
{
"path": "/embedding",
"dataType": "float32",
"distanceFunction": "cosine",
"dimensions": 3072, # for text-embedding-3-large
}
]
}


def get_indexing_policy():
return {
"indexingMode": "consistent",
"includedPaths": [{"path": "/*"}],
"excludedPaths": [{"path": '/"_etag"/?'}],
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
}


def get_azure_cosmos_db_no_sql_vector_search():
return AzureCosmosDBNoSqlVectorSearch(
embedding=AzureOpenAIEmbeddings(
api_key=getenv("AZURE_OPENAI_API_KEY"),
api_version=getenv("AZURE_OPENAI_API_VERSION"),
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
),
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
database_name=getenv("AZURE_COSMOS_DB_DATABASE_NAME"),
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
vector_embedding_policy=get_vector_embedding_policy(),
indexing_policy=get_indexing_policy(),
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
cosmos_database_properties={"id": getenv("AZURE_COSMOS_DB_DATABASE_NAME")},
)


@app.command()
def insert_data(
pdf_url: str = "https://arxiv.org/pdf/2303.08774.pdf",
Expand All @@ -36,38 +75,8 @@ def insert_data(
).split_documents(data)

try:
# Insert the data into Azure Cosmos DB
database_name = getenv("AZURE_COSMOS_DB_DATABASE_NAME")
AzureCosmosDBNoSqlVectorSearch.from_documents(
documents=docs,
embedding=AzureOpenAIEmbeddings(
api_key=getenv("AZURE_OPENAI_API_KEY"),
api_version=getenv("AZURE_OPENAI_API_VERSION"),
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
),
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
database_name=database_name,
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
vector_embedding_policy={
"vectorEmbeddings": [
{
"path": "/embedding",
"dataType": "float32",
"distanceFunction": "cosine",
"dimensions": 3072, # for text-embedding-3-large
}
]
},
indexing_policy={
"indexingMode": "consistent",
"includedPaths": [{"path": "/*"}],
"excludedPaths": [{"path": '/"_etag"/?'}],
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
},
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
cosmos_database_properties={"id": database_name}, # need to add this
)
vector_search = get_azure_cosmos_db_no_sql_vector_search()
vector_search.add_documents(docs)
except Exception as e:
logger.error(f"error: {e}")

Expand All @@ -79,40 +88,10 @@ def query_data(
):
if verbose:
logging.basicConfig(level=logging.DEBUG)

database_name = getenv("AZURE_COSMOS_DB_DATABASE_NAME")
vector_search = AzureCosmosDBNoSqlVectorSearch(
embedding=AzureOpenAIEmbeddings(
api_key=getenv("AZURE_OPENAI_API_KEY"),
api_version=getenv("AZURE_OPENAI_API_VERSION"),
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
),
cosmos_client=CosmosClient.from_connection_string(getenv("AZURE_COSMOS_DB_CONNECTION_STRING")),
database_name=database_name,
container_name=getenv("AZURE_COSMOS_DB_CONTAINER_NAME"),
vector_embedding_policy={
"vectorEmbeddings": [
{
"path": "/embedding",
"dataType": "float32",
"distanceFunction": "cosine",
"dimensions": 3072, # for text-embedding-3-large
}
]
},
indexing_policy={
"indexingMode": "consistent",
"includedPaths": [{"path": "/*"}],
"excludedPaths": [{"path": '/"_etag"/?'}],
"vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
},
cosmos_container_properties={"partition_key": PartitionKey(path="/id")},
cosmos_database_properties={"id": database_name},
)

try:
vector_search = get_azure_cosmos_db_no_sql_vector_search()
results = vector_search.similarity_search(query=query)
logger.info(f"got {len(results)} results")
for idx, result in enumerate(results):
print(f"Result {idx + 1}: {result}")
except Exception as e:
Expand Down
89 changes: 58 additions & 31 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,33 +1,60 @@
openai==1.43.1
aiohappyeyeballs==2.4.3
aiohttp==3.11.7
aiosignal==1.3.1
annotated-types==0.7.0
anyio==4.6.2.post1
attrs==24.2.0
azure-core==1.32.0
azure-cosmos==4.9.0
certifi==2024.8.30
charset-normalizer==3.4.0
click==8.1.7
dataclasses-json==0.6.7
distro==1.9.0
frozenlist==1.5.0
h11==0.14.0
httpcore==1.0.7
httpx==0.27.2
httpx-sse==0.4.0
idna==3.10
jiter==0.7.1
jsonpatch==1.33
jsonpointer==3.0.0
langchain==0.3.8
langchain-community==0.3.8
langchain-core==0.3.21
langchain-openai==0.2.9
langchain-text-splitters==0.3.2
langsmith==0.1.145
markdown-it-py==3.0.0
marshmallow==3.23.1
mdurl==0.1.2
multidict==6.1.0
mypy-extensions==1.0.0
numpy==1.26.4
openai==1.55.0
orjson==3.10.12
packaging==24.2
propcache==0.2.0
pydantic==2.10.1
pydantic-settings==2.6.1
pydantic_core==2.27.1
Pygments==2.18.0
python-dotenv==1.0.1
streamlit==1.38.0
azure-cosmos==4.7.0
plotly==5.24.1
pandas==2.2.2
langchain==0.2.16
langchain-openai==0.1.25
langchain-community==0.2.12
azure-search-documents==11.5.1
azure-identity==1.18.0
azure-ai-documentintelligence==1.0.0b4
azure-storage-blob==12.23.1
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.3
promptflow==1.15.0
promptflow-evals==0.3.2
langgraph==0.2.23
langchain-chroma==0.1.4
beautifulsoup4==4.12.3
langgraph-checkpoint-sqlite==1.0.4
playwright==1.47.0
lxml==5.3.0
nest-asyncio==1.6.0
typer==0.12.5

# To run 99_streamlit_examples/pages/10_Object_Detection.py
# ultralytics==8.2.89

# To run 99_streamlit_examples/pages/11_Pose_Estimation.py
# mediapipe==0.10.14

# To run 99_streamlit_examples/pages/12_Video_processing.py
# opencv-python-headless==4.10.0.84
requests-toolbelt==1.0.0
rich==13.9.4
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
SQLAlchemy==2.0.35
tenacity==9.0.0
tiktoken==0.8.0
tqdm==4.67.0
typer==0.13.1
typing-inspect==0.9.0
typing_extensions==4.12.2
urllib3==2.2.3
yarl==1.18.0