Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@ AZURE_OPENAI_API_KEY="<YOUR_API_KEY>"
AZURE_OPENAI_API_VERSION="2024-06-01"
AZURE_OPENAI_GPT_MODEL="gpt-4o"
AZURE_OPENAI_STT_MODEL="whisper"
AZURE_OPENAI_EMBEDDING_MODEL="text-embedding-3-large"

# Azure Cosmos DB
AZURE_COSMOS_DB_CONNECTION_STRING="AccountEndpoint=https://<YOUR_COSMOSDB_NAME>.documents.azure.com:443/;AccountKey=<ACCOUNT_KEY>;"
AZURE_COSMOS_DB_DATABASE_NAME="workshop"
AZURE_COSMOS_DB_CONTAINER_NAME="chat"

# Azure AI Search
AZURE_AI_SEARCH_ENDPOINT="https://<YOUR_AZURE_SEARCH_NAME>.search.windows.net/"
AZURE_AI_SEARCH_API_KEY="<YOUR_API_KEY>"
AZURE_AI_SEARCH_INDEX_NAME="chat"
41 changes: 41 additions & 0 deletions apps/6_call_azure_ai_search/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Azure AI Search を Python から呼び出す

Azure AI Search を Python から呼び出す方法を説明します。

## 前提条件

- Python 3.11+ がインストールされていること
- Azure AI Search が利用できること
- Azure AI Search の API キーが取得できていること

## 手順

1. Azure AI Search の API キーを取得する
1. [.env.template](../../.env.template) をコピーして `.env` ファイルを作成する
1. `.env` ファイルに API キーを設定する

```shell
# 仮想環境を作成してライブラリをインストールする
python -m venv .venv

# 仮想環境を有効化する
source .venv/bin/activate

# ライブラリをインストールする
pip install -r requirements.txt
```

### 実行例

```shell
# Azure AI Search にインデックスを作成して、ドキュメントを追加する
python apps/6_call_azure_ai_search/create_index.py

# Azure AI Search にクエリを発行して、検索結果を取得する
python apps/6_call_azure_ai_search/search_index.py
```

## 参考資料

- [How to recursively split text by characters](https://python.langchain.com/v0.2/docs/how_to/recursive_text_splitter/)
- [青空文庫 > 吾輩は猫である](https://www.aozora.gr.jp/cards/000148/files/789_14547.html)
57 changes: 57 additions & 0 deletions apps/6_call_azure_ai_search/create_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from os import getenv
from pprint import pprint

from datasets import load_texts
from dotenv import load_dotenv
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

if __name__ == "__main__":
"""
テキストを Azure OpenAI Service で埋め込み化し、Azure AI Search にインデックス化します。
"""
load_dotenv()

# ドキュメントを取得
texts = load_texts(
file_path="./apps/6_call_azure_ai_search/data/test.txt",
)

# テキストを分割
# https://python.langchain.com/v0.2/docs/how_to/recursive_text_splitter/
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=400,
chunk_overlap=20,
length_function=len,
is_separator_regex=False,
)

documents = text_splitter.create_documents(
texts=texts,
)
pprint(documents)

embeddings = AzureOpenAIEmbeddings(
api_key=getenv("AZURE_OPENAI_API_KEY"),
api_version=getenv("AZURE_OPENAI_API_VERSION"),
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
)

# Azure AI Search でのインデックス化
# https://python.langchain.com/v0.2/docs/integrations/vectorstores/azuresearch/
# create index
search = AzureSearch(
azure_search_endpoint=getenv("AZURE_AI_SEARCH_ENDPOINT"),
azure_search_key=getenv("AZURE_AI_SEARCH_API_KEY"),
index_name=getenv("AZURE_AI_SEARCH_INDEX_NAME"),
embedding_function=embeddings.embed_query,
additional_search_client_options={
"retry_total": 4,
},
)

# add documents
docs_ids = search.add_documents(documents=documents)
pprint(docs_ids)
41 changes: 41 additions & 0 deletions apps/6_call_azure_ai_search/data/test.txt

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions apps/6_call_azure_ai_search/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def load_texts(file_path: str) -> list:
with open(file_path) as f:
return f.readlines()
36 changes: 36 additions & 0 deletions apps/6_call_azure_ai_search/search_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from os import getenv
from pprint import pprint

from dotenv import load_dotenv
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings

if __name__ == "__main__":
"""
Azure AI Search で検索を行う
"""
load_dotenv()

embeddings = AzureOpenAIEmbeddings(
api_key=getenv("AZURE_OPENAI_API_KEY"),
api_version=getenv("AZURE_OPENAI_API_VERSION"),
azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
)

vector_store = AzureSearch(
azure_search_endpoint=getenv("AZURE_AI_SEARCH_ENDPOINT"),
azure_search_key=getenv("AZURE_AI_SEARCH_API_KEY"),
index_name=getenv("AZURE_AI_SEARCH_INDEX_NAME"),
embedding_function=embeddings.embed_query,
additional_search_client_options={
"retry_total": 4,
},
)

# search for documents
results = vector_store.hybrid_search(
query="吾輩は猫である。名前はまだない",
k=5,
)
pprint(results)
Loading