ks6088ts-labs
diff --git a/‎.env.template‎
Lines changed: 6 additions & 0 deletions b/‎.env.template‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎apps/6_call_azure_ai_search/README.md‎
Lines changed: 41 additions & 0 deletions b/‎apps/6_call_azure_ai_search/README.md‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎apps/6_call_azure_ai_search/create_index.py‎
Lines changed: 57 additions & 0 deletions b/‎apps/6_call_azure_ai_search/create_index.py‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎apps/6_call_azure_ai_search/data/test.txt‎
Lines changed: 41 additions & 0 deletions b/‎apps/6_call_azure_ai_search/data/test.txt‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎apps/6_call_azure_ai_search/datasets.py‎
Lines changed: 3 additions & 0 deletions b/‎apps/6_call_azure_ai_search/datasets.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎apps/6_call_azure_ai_search/search_index.py‎
Lines changed: 36 additions & 0 deletions b/‎apps/6_call_azure_ai_search/search_index.py‎
Lines changed: 36 additions & 0 deletions
@@ -4,8 +4,14 @@ AZURE_OPENAI_API_KEY="<YOUR_API_KEY>"
 AZURE_OPENAI_API_VERSION="2024-06-01"
 AZURE_OPENAI_GPT_MODEL="gpt-4o"
 AZURE_OPENAI_STT_MODEL="whisper"
+AZURE_OPENAI_EMBEDDING_MODEL="text-embedding-3-large"
 
 # Azure Cosmos DB
 AZURE_COSMOS_DB_CONNECTION_STRING="AccountEndpoint=https://<YOUR_COSMOSDB_NAME>.documents.azure.com:443/;AccountKey=<ACCOUNT_KEY>;"
 AZURE_COSMOS_DB_DATABASE_NAME="workshop"
 AZURE_COSMOS_DB_CONTAINER_NAME="chat"
+
+# Azure AI Search
+AZURE_AI_SEARCH_ENDPOINT="https://<YOUR_AZURE_SEARCH_NAME>.search.windows.net/"
+AZURE_AI_SEARCH_API_KEY="<YOUR_API_KEY>"
+AZURE_AI_SEARCH_INDEX_NAME="chat"
@@ -0,0 +1,41 @@
+# Azure AI Search を Python から呼び出す
+
+Azure AI Search を Python から呼び出す方法を説明します。
+
+## 前提条件
+
+- Python 3.11+ がインストールされていること
+- Azure AI Search が利用できること
+- Azure AI Search の API キーが取得できていること
+
+## 手順
+
+1. Azure AI Search の API キーを取得する
+1. [.env.template](../../.env.template) をコピーして `.env` ファイルを作成する
+1. `.env` ファイルに API キーを設定する
+
+```shell
+# 仮想環境を作成してライブラリをインストールする
+python -m venv .venv
+
+# 仮想環境を有効化する
+source .venv/bin/activate
+
+# ライブラリをインストールする
+pip install -r requirements.txt
+```
+
+### 実行例
+
+```shell
+# Azure AI Search にインデックスを作成して、ドキュメントを追加する
+python apps/6_call_azure_ai_search/create_index.py
+
+# Azure AI Search にクエリを発行して、検索結果を取得する
+python apps/6_call_azure_ai_search/search_index.py
+```
+
+## 参考資料
+
+- [How to recursively split text by characters](https://python.langchain.com/v0.2/docs/how_to/recursive_text_splitter/)
+- [青空文庫 > 吾輩は猫である](https://www.aozora.gr.jp/cards/000148/files/789_14547.html)
@@ -0,0 +1,57 @@
+from os import getenv
+from pprint import pprint
+
+from datasets import load_texts
+from dotenv import load_dotenv
+from langchain_community.vectorstores.azuresearch import AzureSearch
+from langchain_openai import AzureOpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+if __name__ == "__main__":
+    """
+    テキストを Azure OpenAI Service で埋め込み化し、Azure AI Search にインデックス化します。
+    """
+    load_dotenv()
+
+    # ドキュメントを取得
+    texts = load_texts(
+        file_path="./apps/6_call_azure_ai_search/data/test.txt",
+    )
+
+    # テキストを分割
+    # https://python.langchain.com/v0.2/docs/how_to/recursive_text_splitter/
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=400,
+        chunk_overlap=20,
+        length_function=len,
+        is_separator_regex=False,
+    )
+
+    documents = text_splitter.create_documents(
+        texts=texts,
+    )
+    pprint(documents)
+
+    embeddings = AzureOpenAIEmbeddings(
+        api_key=getenv("AZURE_OPENAI_API_KEY"),
+        api_version=getenv("AZURE_OPENAI_API_VERSION"),
+        azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
+        model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
+    )
+
+    # Azure AI Search でのインデックス化
+    # https://python.langchain.com/v0.2/docs/integrations/vectorstores/azuresearch/
+    # create index
+    search = AzureSearch(
+        azure_search_endpoint=getenv("AZURE_AI_SEARCH_ENDPOINT"),
+        azure_search_key=getenv("AZURE_AI_SEARCH_API_KEY"),
+        index_name=getenv("AZURE_AI_SEARCH_INDEX_NAME"),
+        embedding_function=embeddings.embed_query,
+        additional_search_client_options={
+            "retry_total": 4,
+        },
+    )
+
+    # add documents
+    docs_ids = search.add_documents(documents=documents)
+    pprint(docs_ids)
@@ -0,0 +1,3 @@
+def load_texts(file_path: str) -> list:
+    with open(file_path) as f:
+        return f.readlines()
@@ -0,0 +1,36 @@
+from os import getenv
+from pprint import pprint
+
+from dotenv import load_dotenv
+from langchain_community.vectorstores.azuresearch import AzureSearch
+from langchain_openai import AzureOpenAIEmbeddings
+
+if __name__ == "__main__":
+    """
+    Azure AI Search で検索を行う
+    """
+    load_dotenv()
+
+    embeddings = AzureOpenAIEmbeddings(
+        api_key=getenv("AZURE_OPENAI_API_KEY"),
+        api_version=getenv("AZURE_OPENAI_API_VERSION"),
+        azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
+        model=getenv("AZURE_OPENAI_EMBEDDING_MODEL"),
+    )
+
+    vector_store = AzureSearch(
+        azure_search_endpoint=getenv("AZURE_AI_SEARCH_ENDPOINT"),
+        azure_search_key=getenv("AZURE_AI_SEARCH_API_KEY"),
+        index_name=getenv("AZURE_AI_SEARCH_INDEX_NAME"),
+        embedding_function=embeddings.embed_query,
+        additional_search_client_options={
+            "retry_total": 4,
+        },
+    )
+
+    # search for documents
+    results = vector_store.hybrid_search(
+        query="吾輩は猫である。名前はまだない",
+        k=5,
+    )
+    pprint(results)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+def load_texts(file_path: str) -> list:`
	`2`	`+ with open(file_path) as f:`
	`3`	`+ return f.readlines()`