Merge pull request #120 from HeidiSteen/main

HeidiSteen · web-flow · commit 19460a42f289 · 2024-09-12T16:20:25.000-07:00
Update RAG tutorial to use latest stable Python package
diff --git a/Tutorial-RAG/Tutorial-rag-requirements.txt b/Tutorial-RAG/Tutorial-rag-requirements.txt
@@ -1,4 +1,5 @@
 python-dotenv
+azure-core
 azure-search-documents==11.5.1
 azure-storage-blob
 azure-identity
diff --git a/Tutorial-RAG/Tutorial-rag.ipynb b/Tutorial-RAG/Tutorial-rag.ipynb
@@ -31,6 +31,8 @@
     "  - Deploy a chat model (GPT-3.5-Turbo, GPT-4, or equivalent LLM).\n",
     "  - Deploy an embedding model (text-embedding-ada-002, text-embedding-3-large, text-embedding-3-small)\n",
     "\n",
+    "- [Azure AI Services multiservice account](https://learn.microsoft.com/azure/ai-services/multi-service-resource), in the same region as Azure AI Search. This resource is used for the Entity Recognition skill that detects locations in your content.\n",
+    "\n",
     "- [Azure AI Search](https://learn.microsoft.com/azure/search/search-create-service-portal)\n",
     "\n",
     "  - Basic tier or higher is recommended.\n",
@@ -83,18 +85,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from azure.identity import DefaultAzureCredential\n",
-    "from azure.identity import get_bearer_token_provider\n",
-    "from azure.search.documents.indexes import SearchIndexClient\n",
     "from azure.core.credentials import AzureKeyCredential\n",
+    "from azure.search.documents.indexes import SearchIndexClient\n",
     "from azure.search.documents.indexes.models import (\n",
     "    SearchField,\n",
     "    SearchFieldDataType,\n",
     "    VectorSearch,\n",
     "    HnswAlgorithmConfiguration,\n",
     "    VectorSearchProfile,\n",
     "    AzureOpenAIVectorizer,\n",
-    "    AzureOpenAIParameters,\n",
+    "    AzureOpenAIVectorizerParameters,\n",
     "    SearchIndex\n",
     ")\n",
     "\n",
@@ -121,20 +121,21 @@
     "        VectorSearchProfile(  \n",
     "            name=\"myHnswProfile\",  \n",
     "            algorithm_configuration_name=\"myHnsw\",  \n",
-    "            vectorizer=\"myOpenAI\",  \n",
+    "            vectorizer_name=\"myOpenAI\",  \n",
     "        )\n",
     "    ],  \n",
     "    vectorizers=[  \n",
     "        AzureOpenAIVectorizer(  \n",
-    "            name=\"myOpenAI\",  \n",
+    "            vectorizer_name=\"myOpenAI\",  \n",
     "            kind=\"azureOpenAI\",  \n",
-    "            azure_open_ai_parameters=AzureOpenAIParameters(  \n",
-    "                resource_uri=AZURE_OPENAI_ACCOUNT,  \n",
-    "                deployment_id=\"text-embedding-ada-002\",\n",
-    "                model_name=\"text-embedding-ada-002\"\n",
+    "            parameters=AzureOpenAIVectorizerParameters(  \n",
+    "                resource_url=AZURE_OPENAI_ACCOUNT,  \n",
+    "                deployment_name=\"text-embedding-ada-002\",\n",
+    "                model_name=\"text-embedding-ada-002\",\n",
+    "                api_key=AZURE_OPENAI_KEY\n",
     "            ),\n",
     "        ),  \n",
-    "    ],  \n",
+    "    ], \n",
     ")  \n",
     "  \n",
     "# Create the search index\n",
@@ -258,7 +259,8 @@
     "            source_context=\"/document/pages/*\",  \n",
     "            mappings=[  \n",
     "                InputFieldMappingEntry(name=\"chunk\", source=\"/document/pages/*\"),  \n",
-    "                InputFieldMappingEntry(name=\"text_vector\", source=\"/document/pages/*/text_vector\"),  \n",
+    "                InputFieldMappingEntry(name=\"text_vector\", source=\"/document/pages/*/text_vector\"),\n",
+    "                InputFieldMappingEntry(name=\"locations\", source=\"/document/pages/*/locations\"),  \n",
     "                InputFieldMappingEntry(name=\"title\", source=\"/document/metadata_storage_name\"),  \n",
     "            ],  \n",
     "        ),  \n",
@@ -319,11 +321,10 @@
     "    parameters=indexer_parameters\n",
     ")  \n",
     "\n",
+    "# Create and run the indexer  \n",
     "indexer_client = SearchIndexerClient(endpoint=AZURE_SEARCH_SERVICE, credential=AZURE_SEARCH_CREDENTIAL)  \n",
     "indexer_result = indexer_client.create_or_update_indexer(indexer)  \n",
-    "  \n",
-    "# Run the indexer  \n",
-    "indexer_client.run_indexer(indexer_name)  \n",
+    "\n",
     "print(f' {indexer_name} is created and running. Give the indexer a few minutes before running a query.')  "
    ]
   },
@@ -343,7 +344,7 @@
     "from azure.search.documents import SearchClient\n",
     "from azure.search.documents.models import VectorizableTextQuery\n",
     "\n",
-    "# Hybrid Search\n",
+    "# Vector Search using text-to-vector conversion of the querystring\n",
     "query = \"how much of earth is covered by water\"  \n",
     "\n",
     "search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, credential=AZURE_SEARCH_CREDENTIAL, index_name=index_name)\n",
@@ -358,8 +359,9 @@
     "  \n",
     "for result in results:  \n",
     "    print(f\"Score: {result['@search.score']}\")\n",
-    "    print(f\"Title: {result['title']}\")  \n",
-    "    print(f\"Content: {result['chunk']}\")  "
+    "    print(f\"Title: {result['title']}\")\n",
+    "    print(f\"Locations: {result['locations']}\")\n",
+    "    print(f\"Content: {result['chunk']}\")"
    ]
   },
   {
@@ -397,7 +399,7 @@
     "\n",
     "# Provide instructions to the model\n",
     "GROUNDED_PROMPT=\"\"\"\n",
-    "You are an AI assistant that helps users find the information their looking for.\n",
+    "You are an AI assistant that helps users learn from the information found in the source material.\n",
     "Answer the query using only the sources provided below.\n",
     "Use bullets if the answer has multiple points.\n",
     "If the answer is longer than 3 sentences, provide a summary.\n",

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`python-dotenv`
	`2`	`+azure-core`
`2`	`3`	`azure-search-documents==11.5.1`
`3`	`4`	`azure-storage-blob`
`4`	`5`	`azure-identity`