Skip to content

Commit cc4b935

Browse files
committed
Updated streamlit embedding demo
1 parent 39c233e commit cc4b935

File tree

9 files changed

+422
-188
lines changed

9 files changed

+422
-188
lines changed

notebooks/GenAI/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
__pycache__
2+
.venv
3+
.env
4+
microsoft-earnings_embeddings.csv
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import streamlit as st
2+
from styling import global_page_style
3+
4+
def main():
5+
# Set page configuration
6+
# st.set_page_config(page_title="Azure OpenAI RAG Demo Suite", layout="wide")
7+
8+
# Title and subtitle
9+
# Create columns for logo and title
10+
11+
st.markdown(
12+
f'<div style="text-align: center;"><img src="{"https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" }" width="{60}"></div>',
13+
unsafe_allow_html=True
14+
)
15+
st.title("Azure OpenAI RAG Demo Suite")
16+
st.markdown("### Demo Overviews")
17+
st.write("""
18+
Welcome to the Azure OpenAI RAG Demo Suite. On the left side-panel, you will find various demonstrations that showcase the capabilities of Azure OpenAI with a Streamlit frontend. Each demonstration is described in detail below, highlighting their unique features and functionalities.
19+
""")
20+
21+
# Horizontal divider
22+
st.markdown("---")
23+
24+
# Chat with Your Data section
25+
st.markdown("### Chat with Your Data using Azure OpenAI API and AI Search Index (AI Search Query)")
26+
st.write("""
27+
This demo allows users to interact with data stored in their Azure AI Search Index using a combination of semantic and vector search methods.
28+
""")
29+
st.write("""
30+
- **Semantic Search**: Understands the meaning and context of your queries to deliver more relevant results.
31+
- **Vector Search**: Utilizes numerical representations of text to find similar content based on cosine similarity.
32+
""")
33+
# Ensure the user has created the Azure AI search index already
34+
st.write("""
35+
**Note**: Users must have created the Azure AI search index already as shown here: [Upload your own data and query over it](https://github.com/STRIDES/NIHCloudLabAzure/blob/main/notebooks/GenAI/Azure_Open_AI_README.md)
36+
""")
37+
38+
# Horizontal divider
39+
st.markdown("---")
40+
41+
# Generate & Search with Azure OpenAI Embeddings section
42+
st.markdown("### Generate & Search with Azure OpenAI Embeddings (AOAI Embeddings)")
43+
st.write("""
44+
This demo enables users to generate embeddings from a pre-chunked CSV file and perform searches over the content using vector search.
45+
""")
46+
st.write("""
47+
- **Vectorize**: Creates embeddings based on the "microsoft-earnings.csv" file provided in this directory. The embeddings are generated from the "text" column. The CSV file is pre-chunked, meaning the text has already been split and prepared for embedding generation. A new CSV file will be created to store all generated embeddings, forming your vector store.
48+
- **Retrieve**: Generates embeddings based on user queries. The query embedding is then used to search for the most similar document within the vector store using cosine similarity.
49+
""")
50+
st.write("""
51+
Example questions a user can ask about the microsoft-earnings.csv:
52+
- What was said about the budget?
53+
- How many people utilize GitHub to build software?
54+
- How many points did Microsoft Cloud gross margin percentage increase by?
55+
- What are the expectations for the Q2 cash flow?
56+
""")
57+
58+
59+
if __name__ == '__main__':
60+
global_page_style()
61+
main()

notebooks/GenAI/embedding_demos/acs_embeddings.py

Lines changed: 0 additions & 79 deletions
This file was deleted.

notebooks/GenAI/embedding_demos/aoai_embeddings.py

Lines changed: 0 additions & 102 deletions
This file was deleted.
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from openai import AzureOpenAI
2+
import os
3+
import streamlit as st
4+
from dotenv import load_dotenv
5+
from styling import global_page_style
6+
7+
# load in .env variables
8+
load_dotenv()
9+
10+
# Configure Azure OpenAI params, using an Azure OpenAI account with a deployment of an embedding model
11+
azure_endpoint: str = os.getenv('AZURE_OPENAI_BASE')
12+
azure_openai_api_key: str = os.getenv('AZURE_OPENAI_KEY')
13+
azure_openai_api_version: str = os.getenv('AZURE_OPENAI_VERSION')
14+
azure_ada_deployment: str = os.getenv('AZURE_EMBEDDINGS_DEPLOYMENT')
15+
azure_gpt_deployment: str = os.getenv('AZURE_GPT_DEPLOYMENT')
16+
17+
# Configure Azure AI Search params
18+
search_endpoint: str = os.getenv('AZURE_SEARCH_ENDPOINT')
19+
search_key: str = os.getenv('AZURE_SEARCH_ADMIN_KEY')
20+
21+
def chat_on_your_data(query, search_index, messages):
22+
messages.append({"role": "user", "content":query})
23+
with st.chat_message("user"):
24+
st.markdown(query)
25+
with st.spinner('Processing...'):
26+
client = AzureOpenAI(
27+
azure_endpoint=azure_endpoint,
28+
api_key=azure_openai_api_key,
29+
api_version=azure_openai_api_version,
30+
)
31+
completion = client.chat.completions.create(
32+
model=azure_gpt_deployment,
33+
messages=[
34+
{"role": "system", "content": "You are an AI assistant that helps people find information. \
35+
Ensure the Markdown responses are correctly formatted before responding."},
36+
{"role": "user", "content": query}
37+
],
38+
max_tokens=800,
39+
temperature=0.7,
40+
top_p=0.95,
41+
frequency_penalty=0,
42+
presence_penalty=0,
43+
stop=None,
44+
stream=False,
45+
extra_body={
46+
"data_sources": [{
47+
"type": "azure_search",
48+
"parameters": {
49+
"endpoint": f"{search_endpoint}",
50+
"index_name": search_index,
51+
"semantic_configuration": "default",
52+
"query_type": "vector_simple_hybrid",
53+
"fields_mapping": {},
54+
"in_scope": True,
55+
"role_information": "You are an AI assistant that helps people find information.",
56+
"filter": None,
57+
"strictness": 3,
58+
"top_n_documents": 5,
59+
"authentication": {
60+
"type": "api_key",
61+
"key": f"{search_key}"
62+
},
63+
"embedding_dependency": {
64+
"type": "deployment_name",
65+
"deployment_name": azure_ada_deployment
66+
}
67+
}
68+
}]
69+
}
70+
)
71+
print(completion)
72+
response_data = completion.to_dict()
73+
ai_response = response_data['choices'][0]['message']['content']
74+
messages.append({"role": "assistant", "content":ai_response})
75+
with st.chat_message("assistant"):
76+
st.markdown(ai_response)
77+
78+
def main():
79+
st.markdown(
80+
f'<div style="text-align: center;"><img src="{"https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" }" width="{60}"></div>',
81+
unsafe_allow_html=True
82+
)
83+
st.title("Demo - Azure OpenAI & AI Search")
84+
# image = Image.open('image_logo2.png')
85+
# st.image(image, caption = '')
86+
st.write('This demo showcases an innovative way for users to engage with data housed in their Azure AI Search Index by leveraging both \
87+
semantic and vector search techniques. Semantic search enhances the querying process by comprehending the meaning and context of \
88+
user queries, thereby providing more pertinent results. Vector search, on the other hand, employs numerical representations of \
89+
text to identify similar content using cosine similarity. ***For users to effectively utilize this demo, it is essential that they \
90+
have previously created their Azure AI Search Index, following the necessary steps to upload and query their data as outlined [here](https://github.com/STRIDES/NIHCloudLabAzure/blob/main/notebooks/GenAI/Azure_Open_AI_README.md).***')
91+
if 'messages' not in st.session_state:
92+
st.session_state.messages = []
93+
index_name = st.text_input(label="Azure AI Search index name:", value="")
94+
st.write('-'*50)
95+
if index_name:
96+
query = st.chat_input('Input search query here...')
97+
for message in st.session_state.messages:
98+
with st.chat_message(message["role"]):
99+
st.markdown(message['content'])
100+
if query:
101+
chat_on_your_data(query, index_name, st.session_state.messages)
102+
103+
104+
if __name__ == '__main__':
105+
global_page_style()
106+
main()

0 commit comments

Comments
 (0)