Skip to content

Commit c0e21b6

Browse files
authored
Merge branch 'main' into py-identity-sdk-client
2 parents 58a46a8 + 3e5341c commit c0e21b6

12 files changed

+52
-30
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ You can run this repo virtually by using GitHub Codespaces or VS Code Remote Con
5252
1. Create a new folder and switch to it in the terminal
5353
1. Run `azd auth login`
5454
1. Run `azd init -t azure-search-openai-demo`
55-
* For the target location, the regions that currently support the models used in this sample are **East US** or **South Central US**. For an up-to-date list of regions and models, check [here](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models)
5655
* note that this command will initialize a git repository and you do not need to clone this repository
5756

5857
#### Starting from scratch:
5958

6059
Execute the following command, if you don't have any pre-existing Azure services and want to start from a fresh deployment.
6160

6261
1. Run `azd up` - This will provision Azure resources and deploy this sample to those resources, including building the search index based on the files found in the `./data` folder.
62+
* For the target location, the regions that currently support the models used in this sample are **East US**, **France Central**, **South Central US**, **UK South**, and **West Europe**. For an up-to-date list of regions and models, check [here](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models)
6363
1. After the application has been successfully deployed you will see a URL printed to the console. Click that URL to interact with the application in your browser.
6464

6565
It will look like the following:

app/backend/app.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import os
2+
import io
23
import mimetypes
34
import time
45
import logging
56
import openai
6-
from flask import Flask, request, jsonify
7+
from flask import Flask, request, jsonify, send_file, abort
78
from azure.identity import DefaultAzureCredential
89
from azure.search.documents import SearchClient
910
from approaches.retrievethenread import RetrieveThenReadApproach
@@ -76,14 +77,21 @@ def static_file(path):
7677
@app.route("/content/<path>")
7778
def content_file(path):
7879
blob = blob_container.get_blob_client(path).download_blob()
80+
if not blob.properties or not blob.properties.has_key("content_settings"):
81+
abort(404)
7982
mime_type = blob.properties["content_settings"]["content_type"]
8083
if mime_type == "application/octet-stream":
8184
mime_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
82-
return blob.readall(), 200, {"Content-Type": mime_type, "Content-Disposition": f"inline; filename={path}"}
85+
blob_file = io.BytesIO()
86+
blob.readinto(blob_file)
87+
blob_file.seek(0)
88+
return send_file(blob_file, mimetype=mime_type, as_attachment=False, download_name=path)
8389

8490
@app.route("/ask", methods=["POST"])
8591
def ask():
8692
ensure_openai_token()
93+
if not request.json:
94+
return jsonify({"error": "request must be json"}), 400
8795
approach = request.json["approach"]
8896
try:
8997
impl = ask_approaches.get(approach)
@@ -98,6 +106,8 @@ def ask():
98106
@app.route("/chat", methods=["POST"])
99107
def chat():
100108
ensure_openai_token()
109+
if not request.json:
110+
return jsonify({"error": "request must be json"}), 400
101111
approach = request.json["approach"]
102112
try:
103113
impl = chat_approaches.get(approach)

app/backend/approaches/approach.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from typing import Any
2+
3+
14
class Approach:
2-
def run(self, q: str, use_summaries: bool) -> any:
5+
def run(self, q: str, overrides: dict[str, Any]) -> Any:
36
raise NotImplementedError

app/backend/approaches/chatreadretrieveread.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Any, Sequence
2+
13
import openai
24
from azure.search.documents import SearchClient
35
from azure.search.documents.models import QueryType
@@ -12,7 +14,7 @@ class ChatReadRetrieveReadApproach(Approach):
1214
Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
1315
Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
1416
For tabular information return it as an html table. Do not return markdown format.
15-
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
17+
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
1618
{follow_up_questions_prompt}
1719
{injected_prompt}
1820
Sources:
@@ -48,7 +50,7 @@ def __init__(self, search_client: SearchClient, chatgpt_deployment: str, gpt_dep
4850
self.sourcepage_field = sourcepage_field
4951
self.content_field = content_field
5052

51-
def run(self, history: list[dict], overrides: dict) -> any:
53+
def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
5254
use_semantic_captions = True if overrides.get("semantic_captions") else False
5355
top = overrides.get("top") or 3
5456
exclude_category = overrides.get("exclude_category") or None
@@ -105,10 +107,10 @@ def run(self, history: list[dict], overrides: dict) -> any:
105107

106108
return {"data_points": results, "answer": completion.choices[0].text, "thoughts": f"Searched for:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>')}
107109

108-
def get_chat_history_as_text(self, history, include_last_turn=True, approx_max_tokens=1000) -> str:
110+
def get_chat_history_as_text(self, history: Sequence[dict[str, str]], include_last_turn: bool=True, approx_max_tokens: int=1000) -> str:
109111
history_text = ""
110112
for h in reversed(history if include_last_turn else history[:-1]):
111-
history_text = """<|im_start|>user""" +"\n" + h["user"] + "\n" + """<|im_end|>""" + "\n" + """<|im_start|>assistant""" + "\n" + (h.get("bot") + """<|im_end|>""" if h.get("bot") else "") + "\n" + history_text
113+
history_text = """<|im_start|>user""" + "\n" + h["user"] + "\n" + """<|im_end|>""" + "\n" + """<|im_start|>assistant""" + "\n" + (h.get("bot", "") + """<|im_end|>""" if h.get("bot") else "") + "\n" + history_text
112114
if len(history_text) > approx_max_tokens*4:
113115
break
114-
return history_text
116+
return history_text

app/backend/approaches/readdecomposeask.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from langchain.agents.react.base import ReActDocstoreAgent
1111
from langchainadapters import HtmlCallbackHandler
1212
from text import nonewlines
13-
from typing import List
13+
from typing import Any, List, Optional
1414

1515
class ReadDecomposeAsk(Approach):
1616
def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepage_field: str, content_field: str):
@@ -19,7 +19,7 @@ def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepa
1919
self.sourcepage_field = sourcepage_field
2020
self.content_field = content_field
2121

22-
def search(self, q: str, overrides: dict) -> str:
22+
def search(self, q: str, overrides: dict[str, Any]) -> str:
2323
use_semantic_captions = True if overrides.get("semantic_captions") else False
2424
top = overrides.get("top") or 3
2525
exclude_category = overrides.get("exclude_category") or None
@@ -42,7 +42,7 @@ def search(self, q: str, overrides: dict) -> str:
4242
self.results = [doc[self.sourcepage_field] + ":" + nonewlines(doc[self.content_field][:500]) for doc in r]
4343
return "\n".join(self.results)
4444

45-
def lookup(self, q: str) -> str:
45+
def lookup(self, q: str) -> Optional[str]:
4646
r = self.search_client.search(q,
4747
top = 1,
4848
include_total_count=True,
@@ -58,9 +58,9 @@ def lookup(self, q: str) -> str:
5858
return answers[0].text
5959
if r.get_count() > 0:
6060
return "\n".join(d['content'] for d in r)
61-
return None
61+
return None
6262

63-
def run(self, q: str, overrides: dict) -> any:
63+
def run(self, q: str, overrides: dict[str, Any]) -> Any:
6464
# Not great to keep this as instance state, won't work with interleaving (e.g. if using async), but keeps the example simple
6565
self.results = None
6666

app/backend/approaches/readretrieveread.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
from langchain.callbacks.manager import CallbackManager, Callbacks
77
from langchain.chains import LLMChain
88
from langchain.agents import Tool, ZeroShotAgent, AgentExecutor
9-
from langchain.llms.openai import AzureOpenAI
109
from langchainadapters import HtmlCallbackHandler
1110
from text import nonewlines
1211
from lookuptool import CsvLookupTool
12+
from typing import Any
1313

1414
# Attempt to answer questions by iteratively evaluating the question to see what information is missing, and once all information
1515
# is present then formulate an answer. Each iteration consists of two parts: first use GPT to see if we need more information,
@@ -45,7 +45,7 @@ def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepa
4545
self.sourcepage_field = sourcepage_field
4646
self.content_field = content_field
4747

48-
def retrieve(self, q: str, overrides: dict) -> any:
48+
def retrieve(self, q: str, overrides: dict[str, Any]) -> Any:
4949
use_semantic_captions = True if overrides.get("semantic_captions") else False
5050
top = overrides.get("top") or 3
5151
exclude_category = overrides.get("exclude_category") or None
@@ -69,7 +69,7 @@ def retrieve(self, q: str, overrides: dict) -> any:
6969
content = "\n".join(self.results)
7070
return content
7171

72-
def run(self, q: str, overrides: dict) -> any:
72+
def run(self, q: str, overrides: dict[str, Any]) -> Any:
7373
# Not great to keep this as instance state, won't work with interleaving (e.g. if using async), but keeps the example simple
7474
self.results = None
7575

@@ -115,5 +115,5 @@ def __init__(self, employee_name: str, callbacks: Callbacks = None):
115115
self.func = self.employee_info
116116
self.employee_name = employee_name
117117

118-
def employee_info(self, unused: str) -> str:
119-
return self.lookup(self.employee_name)
118+
def employee_info(self, name: str) -> str:
119+
return self.lookup(name)

app/backend/approaches/retrievethenread.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from azure.search.documents import SearchClient
44
from azure.search.documents.models import QueryType
55
from text import nonewlines
6+
from typing import Any
67

78
# Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
89
# top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion
@@ -45,7 +46,7 @@ def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepa
4546
self.sourcepage_field = sourcepage_field
4647
self.content_field = content_field
4748

48-
def run(self, q: str, overrides: dict) -> any:
49+
def run(self, q: str, overrides: dict[str, Any]) -> Any:
4950
use_semantic_captions = True if overrides.get("semantic_captions") else False
5051
top = overrides.get("top") or 3
5152
exclude_category = overrides.get("exclude_category") or None

app/backend/langchainadapters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
from typing import Any, Dict, List, Optional
1+
from typing import Any, Dict, List, Optional, Union
22
from langchain.callbacks.base import BaseCallbackHandler
33
from langchain.schema import AgentAction, AgentFinish, LLMResult
44

5-
def ch(text: str) -> str:
5+
def ch(text: Union[str, object]) -> str:
66
s = text if isinstance(text, str) else str(text)
77
return s.replace("<", "&lt;").replace(">", "&gt;").replace("\r", "").replace("\n", "<br>")
88

app/backend/lookuptool.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
from os import path
21
import csv
2+
from pathlib import Path
33
from langchain.agents import Tool
44
from langchain.callbacks.manager import Callbacks
5-
from typing import Optional
5+
from typing import Optional, Union
66

77
class CsvLookupTool(Tool):
88
data: dict[str, str] = {}
99

10-
def __init__(self, filename: path, key_field: str, name: str = "lookup",
10+
def __init__(self, filename: Union[str, Path], key_field: str, name: str = "lookup",
1111
description: str = "useful to look up details given an input key as opposite to searching data with an unstructured question",
1212
callbacks: Callbacks = None):
1313
super().__init__(name, self.lookup, description, callbacks=callbacks)

app/backend/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
azure-identity==1.13.0
2-
Flask==2.2.2
2+
Flask==2.2.5
33
langchain==0.0.187
44
openai==0.26.4
55
azure-search-documents==11.4.0b3

0 commit comments

Comments
 (0)