Skip to content

Commit 3601589

Browse files
srbalakrpamelafox
andauthored
Integrate GPT4-vision support (#1056)
* Squash commit# This is a combination of 3 commits. add new files s remove * s * s * Add one more conditional to Bicep, and fix the mocks to use vector_queries --------- Co-authored-by: Pamela Fox <[email protected]>
1 parent b382d94 commit 3601589

File tree

121 files changed

+6165
-3087
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+6165
-3087
lines changed

.github/workflows/python-test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ jobs:
4848
run: black . --check --verbose
4949
- name: Run Python tests
5050
if: runner.os != 'Windows'
51-
run: python3 -m pytest -s -vv --cov --cov-fail-under=87
51+
run: python3 -m pytest -s -vv --cov --cov-fail-under=86
5252
- name: Run E2E tests with Playwright
5353
id: e2e
5454
if: runner.os != 'Windows'

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,4 @@ npm-debug.log*
146146
node_modules
147147
static/
148148

149-
data/*.md5
149+
data/**/*.md5

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,10 @@ either you or they can follow these steps:
241241

242242
## Enabling optional features
243243

244+
### Enabling GPT-4 Turbo with Vision
245+
246+
This section covers the integration of GPT-4 Vision with Azure AI Search. Learn how to enhance your search capabilities with the power of image and text indexing, enabling advanced search functionalities over diverse document types. For a detailed guide on setup and usage, visit our [Enabling GPT-4 Turbo with Vision](docs/gpt4v.md) page.
247+
244248
### Enabling authentication
245249

246250
By default, the deployed Azure web app will have no authentication or access restrictions enabled, meaning anyone with routable network access to the web app can chat with your indexed data. You can require authentication to your Azure Active Directory by following the [Add app authentication](https://learn.microsoft.com/azure/app-service/scenario-secure-app-authentication-app-service) tutorial and set it up against the deployed web app.

app/backend/app.py

Lines changed: 101 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
import dataclasses
12
import io
23
import json
34
import logging
45
import mimetypes
56
import os
67
from pathlib import Path
7-
from typing import AsyncGenerator
8+
from typing import AsyncGenerator, cast
89

910
from azure.core.exceptions import ResourceNotFoundError
1011
from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
12+
from azure.keyvault.secrets.aio import SecretClient
1113
from azure.monitor.opentelemetry import configure_azure_monitor
1214
from azure.search.documents.aio import SearchClient
1315
from azure.storage.blob.aio import BlobServiceClient
@@ -28,14 +30,22 @@
2830
)
2931
from quart_cors import cors
3032

33+
from approaches.approach import Approach
3134
from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
35+
from approaches.chatreadretrievereadvision import ChatReadRetrieveReadVisionApproach
3236
from approaches.retrievethenread import RetrieveThenReadApproach
37+
from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
3338
from core.authentication import AuthenticationHelper
3439

40+
CONFIG_OPENAI_TOKEN = "openai_token"
41+
CONFIG_CREDENTIAL = "azure_credential"
3542
CONFIG_ASK_APPROACH = "ask_approach"
43+
CONFIG_ASK_VISION_APPROACH = "ask_vision_approach"
44+
CONFIG_CHAT_VISION_APPROACH = "chat_vision_approach"
3645
CONFIG_CHAT_APPROACH = "chat_approach"
3746
CONFIG_BLOB_CONTAINER_CLIENT = "blob_container_client"
3847
CONFIG_AUTH_CLIENT = "auth_client"
48+
CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
3949
CONFIG_SEARCH_CLIENT = "search_client"
4050
CONFIG_OPENAI_CLIENT = "openai_client"
4151
ERROR_MESSAGE = """The app encountered an error processing your request.
@@ -121,7 +131,12 @@ async def ask():
121131
auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
122132
context["auth_claims"] = await auth_helper.get_auth_claims_if_enabled(request.headers)
123133
try:
124-
approach = current_app.config[CONFIG_ASK_APPROACH]
134+
use_gpt4v = context.get("overrides", {}).get("use_gpt4v", False)
135+
approach: Approach
136+
if use_gpt4v and CONFIG_ASK_VISION_APPROACH in current_app.config:
137+
approach = cast(Approach, current_app.config[CONFIG_ASK_VISION_APPROACH])
138+
else:
139+
approach = cast(Approach, current_app.config[CONFIG_ASK_APPROACH])
125140
r = await approach.run(
126141
request_json["messages"], context=context, session_state=request_json.get("session_state")
127142
)
@@ -130,13 +145,20 @@ async def ask():
130145
return error_response(error, "/ask")
131146

132147

148+
class JSONEncoder(json.JSONEncoder):
149+
def default(self, o):
150+
if dataclasses.is_dataclass(o):
151+
return dataclasses.asdict(o)
152+
return super().default(o)
153+
154+
133155
async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenerator[str, None]:
134156
try:
135157
async for event in r:
136-
yield json.dumps(event, ensure_ascii=False) + "\n"
137-
except Exception as e:
138-
logging.exception("Exception while generating response stream: %s", e)
139-
yield json.dumps(error_dict(e))
158+
yield json.dumps(event, ensure_ascii=False, cls=JSONEncoder) + "\n"
159+
except Exception as error:
160+
logging.exception("Exception while generating response stream: %s", error)
161+
yield json.dumps(error_dict(error))
140162

141163

142164
@bp.route("/chat", methods=["POST"])
@@ -147,8 +169,15 @@ async def chat():
147169
context = request_json.get("context", {})
148170
auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
149171
context["auth_claims"] = await auth_helper.get_auth_claims_if_enabled(request.headers)
172+
150173
try:
151-
approach = current_app.config[CONFIG_CHAT_APPROACH]
174+
use_gpt4v = context.get("overrides", {}).get("use_gpt4v", False)
175+
approach: Approach
176+
if use_gpt4v and CONFIG_CHAT_VISION_APPROACH in current_app.config:
177+
approach = cast(Approach, current_app.config[CONFIG_CHAT_VISION_APPROACH])
178+
else:
179+
approach = cast(Approach, current_app.config[CONFIG_CHAT_APPROACH])
180+
152181
result = await approach.run(
153182
request_json["messages"],
154183
stream=request_json.get("stream", False),
@@ -173,21 +202,31 @@ def auth_setup():
173202
return jsonify(auth_helper.get_auth_setup_for_client())
174203

175204

205+
@bp.route("/config", methods=["GET"])
206+
def config():
207+
return jsonify({"showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED]})
208+
209+
176210
@bp.before_app_serving
177211
async def setup_clients():
178212
# Replace these with your own values, either in environment variables or directly here
179213
AZURE_STORAGE_ACCOUNT = os.environ["AZURE_STORAGE_ACCOUNT"]
180214
AZURE_STORAGE_CONTAINER = os.environ["AZURE_STORAGE_CONTAINER"]
181215
AZURE_SEARCH_SERVICE = os.environ["AZURE_SEARCH_SERVICE"]
182216
AZURE_SEARCH_INDEX = os.environ["AZURE_SEARCH_INDEX"]
217+
VISION_SECRET_NAME = os.getenv("VISION_SECRET_NAME")
218+
AZURE_KEY_VAULT_NAME = os.getenv("AZURE_KEY_VAULT_NAME")
183219
# Shared by all OpenAI deployments
184220
OPENAI_HOST = os.getenv("OPENAI_HOST", "azure")
185221
OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
186222
OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
187223
# Used with Azure OpenAI deployments
188224
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
225+
AZURE_OPENAI_GPT4V_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT4V_DEPLOYMENT")
226+
AZURE_OPENAI_GPT4V_MODEL = os.environ.get("AZURE_OPENAI_GPT4V_MODEL")
189227
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") if OPENAI_HOST == "azure" else None
190228
AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST == "azure" else None
229+
AZURE_VISION_ENDPOINT = os.getenv("AZURE_VISION_ENDPOINT", "")
191230
# Used only with non-Azure OpenAI deployments
192231
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
193232
OPENAI_ORGANIZATION = os.getenv("OPENAI_ORGANIZATION")
@@ -204,6 +243,8 @@ async def setup_clients():
204243
AZURE_SEARCH_QUERY_LANGUAGE = os.getenv("AZURE_SEARCH_QUERY_LANGUAGE", "en-us")
205244
AZURE_SEARCH_QUERY_SPELLER = os.getenv("AZURE_SEARCH_QUERY_SPELLER", "lexicon")
206245

246+
USE_GPT4V = os.getenv("USE_GPT4V", "").lower() == "true"
247+
207248
# Use the current user identity to authenticate with Azure OpenAI, AI Search and Blob Storage (no secrets needed,
208249
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
209250
# keys for each service
@@ -231,6 +272,15 @@ async def setup_clients():
231272
)
232273
blob_container_client = blob_client.get_container_client(AZURE_STORAGE_CONTAINER)
233274

275+
vision_key = None
276+
if VISION_SECRET_NAME and AZURE_KEY_VAULT_NAME: # Cognitive vision keys are stored in keyvault
277+
key_vault_client = SecretClient(
278+
vault_url=f"https://{AZURE_KEY_VAULT_NAME}.vault.azure.net", credential=azure_credential
279+
)
280+
vision_secret = await key_vault_client.get_secret(VISION_SECRET_NAME)
281+
vision_key = vision_secret.value
282+
await key_vault_client.close()
283+
234284
# Used by the OpenAI SDK
235285
openai_client: AsyncOpenAI
236286

@@ -253,6 +303,8 @@ async def setup_clients():
253303
current_app.config[CONFIG_BLOB_CONTAINER_CLIENT] = blob_container_client
254304
current_app.config[CONFIG_AUTH_CLIENT] = auth_helper
255305

306+
current_app.config[CONFIG_GPT4V_DEPLOYED] = bool(USE_GPT4V)
307+
256308
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
257309
# or some derivative, here we include several for exploration purposes
258310
current_app.config[CONFIG_ASK_APPROACH] = RetrieveThenReadApproach(
@@ -268,6 +320,42 @@ async def setup_clients():
268320
query_speller=AZURE_SEARCH_QUERY_SPELLER,
269321
)
270322

323+
if AZURE_OPENAI_GPT4V_MODEL:
324+
if vision_key is None:
325+
raise ValueError("Vision key must be set (in Key Vault) to use the vision approach.")
326+
327+
current_app.config[CONFIG_ASK_VISION_APPROACH] = RetrieveThenReadVisionApproach(
328+
search_client=search_client,
329+
openai_client=openai_client,
330+
blob_container_client=blob_container_client,
331+
vision_endpoint=AZURE_VISION_ENDPOINT,
332+
vision_key=vision_key,
333+
gpt4v_deployment=AZURE_OPENAI_GPT4V_DEPLOYMENT,
334+
gpt4v_model=AZURE_OPENAI_GPT4V_MODEL,
335+
embedding_model=OPENAI_EMB_MODEL,
336+
embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
337+
sourcepage_field=KB_FIELDS_SOURCEPAGE,
338+
content_field=KB_FIELDS_CONTENT,
339+
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
340+
query_speller=AZURE_SEARCH_QUERY_SPELLER,
341+
)
342+
343+
current_app.config[CONFIG_CHAT_VISION_APPROACH] = ChatReadRetrieveReadVisionApproach(
344+
search_client=search_client,
345+
openai_client=openai_client,
346+
blob_container_client=blob_container_client,
347+
vision_endpoint=AZURE_VISION_ENDPOINT,
348+
vision_key=vision_key,
349+
gpt4v_deployment=AZURE_OPENAI_GPT4V_DEPLOYMENT,
350+
gpt4v_model=AZURE_OPENAI_GPT4V_MODEL,
351+
embedding_model=OPENAI_EMB_MODEL,
352+
embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
353+
sourcepage_field=KB_FIELDS_SOURCEPAGE,
354+
content_field=KB_FIELDS_CONTENT,
355+
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
356+
query_speller=AZURE_SEARCH_QUERY_SPELLER,
357+
)
358+
271359
current_app.config[CONFIG_CHAT_APPROACH] = ChatReadRetrieveReadApproach(
272360
search_client=search_client,
273361
openai_client=openai_client,
@@ -282,6 +370,12 @@ async def setup_clients():
282370
)
283371

284372

373+
@bp.after_app_serving
374+
async def close_clients():
375+
await current_app.config[CONFIG_SEARCH_CLIENT].close()
376+
await current_app.config[CONFIG_BLOB_CONTAINER_CLIENT].close()
377+
378+
285379
def create_app():
286380
app = Quart(__name__)
287381
app.register_blueprint(bp)

0 commit comments

Comments
 (0)