Replaced RAG query with Matt's updated query

HeidiSteen · HeidiSteen · commit e2cb5393f134 · 2024-08-16T13:19:19.000-07:00
diff --git a/articles/search/search-get-started-rag.md b/articles/search/search-get-started-rag.md
@@ -186,49 +186,28 @@ This section uses Visual Studio Code and Python to call the chat completion APIs
     sources_to_include=5
     ```
 
-1. Set up clients, a search functions prompts, and a chat. The function retrieves selected fields from the search index. 
-
-    ```python
-    # Set up the query for generating responses
-    from azure.core.credentials_async import AsyncTokenCredential
-    from azure.identity.aio import get_bearer_token_provider
-    from azure.search.documents.aio import SearchClient
-    from openai import AsyncAzureOpenAI
-    from enum import Enum
-    from typing import List, Optional
-    
-    def create_openai_client(credential: AsyncTokenCredential) -> AsyncAzureOpenAI:
-        token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
-        return AsyncAzureOpenAI(
-            api_version="2024-04-01-preview",
-            azure_endpoint=AZURE_OPENAI_ACCOUNT,
-            azure_ad_token_provider=token_provider
-        )
-    
-    def create_search_client(credential: AsyncTokenCredential) -> SearchClient:
-        return SearchClient(
-            endpoint=AZURE_SEARCH_SERVICE,
-            index_name="hotels-sample-index",
-            credential=credential
-        )
-    
-    # This quickstart is only using text at the moment
-    class SearchType(Enum):
-        TEXT = "text"
-        VECTOR = "vector"
-        HYBRID = "hybrid"
+1. Set up clients, the prompt, query, and response.
+
+   ```python
+   # Set up the query for generating responses
+    from azure.identity import DefaultAzureCredential
+    from azure.identity import get_bearer_token_provider
+    from azure.search.documents import SearchClient
+    from openai import AzureOpenAI
     
-    # This function retrieves the selected fields from the search index
-    async def get_sources(search_client: SearchClient, query: str, search_type: SearchType, use_semantic_reranker: bool = True, sources_to_include: int = 5) -> List[str]:
-        search_type == SearchType.TEXT,
-        response = await search_client.search(
-            search_text=query,
-            query_type="semantic" if use_semantic_reranker else "simple",
-            top=sources_to_include,
-            select="Description,HotelName,Tags"
-        )
+    credential = DefaultAzureCredential()
+    token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
+    openai_client = AzureOpenAI(
+        api_version="2024-06-01",
+        azure_endpoint=AZURE_OPENAI_ACCOUNT,
+        azure_ad_token_provider=token_provider
+    )
     
-        return [ document async for document in response ]
+    search_client = SearchClient(
+        endpoint=AZURE_SEARCH_SERVICE,
+        index_name="hotels-sample-index",
+        credential=credential
+    )
     
     # This prompt provides instructions to the model
     GROUNDED_PROMPT="""
@@ -240,63 +219,30 @@ This section uses Visual Studio Code and Python to call the chat completion APIs
     Query: {query}
     Sources:\n{sources}
     """
-
-    # This class instantiates the chat
-    class ChatThread:
-        def __init__(self):
-            self.messages = []
-            self.search_results = []
-        
-        def append_message(self, role: str, message: str):
-            self.messages.append({
-                "role": role,
-                "content": message
-            })
     
-        async def append_grounded_message(self, search_client: SearchClient, query: str, search_type: SearchType, use_semantic_reranker: bool = True, sources_to_include: int = 5):
-            sources = await get_sources(search_client, query, search_type, use_semantic_reranker, sources_to_include)
-            sources_formatted = "\n".join([f'{document["HotelName"]}:{document["Description"]}:{document["Tags"]}' for document in sources])
-            self.append_message(role="user", message=GROUNDED_PROMPT.format(query=query, sources=sources_formatted))
-            self.search_results.append(
-                {
-                    "message_index": len(self.messages) - 1,
-                    "query": query,
-                    "sources": sources
-                }
-            )
+    # Query is the question being asked. It's sent to the search engine and the LLM.
+    query="Can you recommend a few hotels near the ocean with beach access and good views"
     
-        async def get_openai_response(self, openai_client: AsyncAzureOpenAI, model: str):
-            response = await openai_client.chat.completions.create(
-                messages=self.messages,
-                model=model
-            )
-            self.append_message(role="assistant", message=response.choices[0].message.content)
+    # Set up the search results and the chat thread.
+    # Retrieve the selected fields from the search index related to the question.
+    search_results = search_client.search(
+        search_text=query,
+        top=5,
+        select="Description,HotelName,Tags"
+    )
+    sources_formatted = "\n".join([f'{document["HotelName"]}:{document["Description"]}:{document["Tags"]}' for document in search_results])
     
-        def get_last_message(self) -> Optional[object]:
-            return self.messages[-1] if len(self.messages) > 0 else None
-    
-        def get_last_message_sources(self) -> Optional[List[object]]:
-            return self.search_results[-1]["sources"] if len(self.search_results) > 0 else None
-    ```
-
-1. Invoke the chat and call the search function, passing in a query string to search for.
-
-    ```python
-    import azure.identity.aio
-    
-    chat_thread = ChatThread()
-    chat_deployment = AZURE_DEPLOYMENT_MODEL
-    
-    async with azure.identity.aio.DefaultAzureCredential() as credential, create_search_client(credential) as search_client, create_openai_client(credential) as openai_client:
-        await chat_thread.append_grounded_message(
-            search_client=search_client,
-            query="Can you recommend a few hotels near the ocean with beach access and good views",
-            search_type=SearchType(search_type),
-            use_semantic_reranker=use_semantic_reranker,
-            sources_to_include=sources_to_include)
-        await chat_thread.get_openai_response(openai_client=openai_client, model=chat_deployment)
+    response = openai_client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": GROUNDED_PROMPT.format(query=query, sources=sources_formatted)
+            }
+        ],
+        model=AZURE_DEPLOYMENT_MODEL
+    )
     
-    print(chat_thread.get_last_message()["content"])
+    print(response.choices[0].message.content)
     ```
 
     Output is from Azure OpenAI, and it consists of recommendations for several hotels. Here's an example of what the output might look like:
@@ -312,10 +258,12 @@ This section uses Visual Studio Code and Python to call the chat completion APIs
 
     If you get an authorization error message, wait a few minutes and try again. It can take several minutes for role assignments to become operational.
 
-    To experiment further, change the query and rerun the last step to better understand how the model works with your data.
+    To experiment further, change the query and rerun the last step to better understand how the model works with the grounding data.
 
     You can also modify the prompt to change the tone or structure of the output.
 
+    You might also try the query without semantic ranking by setting `use_semantic_reranker=False` in the query parameters step. Semantic ranking can noticably improve the relevance of query results and the ability of the LLM to return useful information. Experimentation can help you decide whether it makes a difference for your content.
+
 ## Clean up
 
 When you're working in your own subscription, it's a good idea at the end of a project to identify whether you still need the resources you created. Resources left running can cost you money. You can delete resources individually or delete the resource group to delete the entire set of resources.