Skip to content

Commit 7a7881e

Browse files
authored
Add support for using new ada models with different dimensions (#1378)
* update reqs * Add parameters for ada 3 * Update readme * Update TOC * Fix tests and mocks * Mypy fixes * Addressing feedback (more testing still needed) * More readme notes * Cast openaidimensions to int * Support batch for ada3 * right model name * Note about regions * Add constants for tests model name and dimensions * Default to int * Typing error * Fix env var name * Undo unneeded parameter move * Dont specify dimensons for old models * typing
1 parent 767f81d commit 7a7881e

22 files changed

+243
-45
lines changed

.azdo/pipelines/azure-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ steps:
7070
AZURE_OPENAI_EMB_DEPLOYMENT: $(AZURE_OPENAI_EMB_DEPLOYMENT)
7171
AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: $(AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY)
7272
AZURE_OPENAI_EMB_DEPLOYMENT_VERSION: $(AZURE_OPENAI_EMB_DEPLOYMENT_VERSION)
73+
AZURE_OPENAI_EMB_DIMENSIONS: $(AZURE_OPENAI_EMB_DIMENSIONS)
7374
OPENAI_HOST: $(OPENAI_HOST)
7475
OPENAI_API_KEY: $(OPENAI_API_KEY)
7576
OPENAI_ORGANIZATION: $(OPENAI_ORGANIZATION)

.github/workflows/azure-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ jobs:
6161
AZURE_OPENAI_EMB_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT }}
6262
AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY }}
6363
AZURE_OPENAI_EMB_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_VERSION }}
64+
AZURE_OPENAI_EMB_DIMENSIONS: ${{ vars.AZURE_OPENAI_EMB_DIMENSIONS }}
6465
OPENAI_HOST: ${{ vars.OPENAI_HOST }}
6566
OPENAI_API_KEY: ${{ vars.OPENAI_API_KEY }}
6667
OPENAI_ORGANIZATION: ${{ vars.OPENAI_ORGANIZATION }}

app/backend/app.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ async def setup_clients():
222222
OPENAI_HOST = os.getenv("OPENAI_HOST", "azure")
223223
OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
224224
OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
225+
OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS", 1536))
225226
# Used with Azure OpenAI deployments
226227
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
227228
AZURE_OPENAI_GPT4V_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT4V_DEPLOYMENT")
@@ -345,6 +346,7 @@ async def setup_clients():
345346
chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
346347
embedding_model=OPENAI_EMB_MODEL,
347348
embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
349+
embedding_dimensions=OPENAI_EMB_DIMENSIONS,
348350
sourcepage_field=KB_FIELDS_SOURCEPAGE,
349351
content_field=KB_FIELDS_CONTENT,
350352
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
@@ -365,6 +367,7 @@ async def setup_clients():
365367
gpt4v_model=AZURE_OPENAI_GPT4V_MODEL,
366368
embedding_model=OPENAI_EMB_MODEL,
367369
embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
370+
embedding_dimensions=OPENAI_EMB_DIMENSIONS,
368371
sourcepage_field=KB_FIELDS_SOURCEPAGE,
369372
content_field=KB_FIELDS_CONTENT,
370373
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
@@ -382,6 +385,7 @@ async def setup_clients():
382385
gpt4v_model=AZURE_OPENAI_GPT4V_MODEL,
383386
embedding_model=OPENAI_EMB_MODEL,
384387
embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
388+
embedding_dimensions=OPENAI_EMB_DIMENSIONS,
385389
sourcepage_field=KB_FIELDS_SOURCEPAGE,
386390
content_field=KB_FIELDS_CONTENT,
387391
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
@@ -396,6 +400,7 @@ async def setup_clients():
396400
chatgpt_deployment=AZURE_OPENAI_CHATGPT_DEPLOYMENT,
397401
embedding_model=OPENAI_EMB_MODEL,
398402
embedding_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
403+
embedding_dimensions=OPENAI_EMB_DIMENSIONS,
399404
sourcepage_field=KB_FIELDS_SOURCEPAGE,
400405
content_field=KB_FIELDS_CONTENT,
401406
query_language=AZURE_SEARCH_QUERY_LANGUAGE,

app/backend/approaches/approach.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
import os
22
from abc import ABC
33
from dataclasses import dataclass
4-
from typing import Any, AsyncGenerator, Awaitable, Callable, List, Optional, Union, cast
4+
from typing import (
5+
Any,
6+
AsyncGenerator,
7+
Awaitable,
8+
Callable,
9+
List,
10+
Optional,
11+
TypedDict,
12+
Union,
13+
cast,
14+
)
515
from urllib.parse import urljoin
616

717
import aiohttp
@@ -90,6 +100,7 @@ def __init__(
90100
query_speller: Optional[str],
91101
embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text"
92102
embedding_model: str,
103+
embedding_dimensions: int,
93104
openai_host: str,
94105
vision_endpoint: str,
95106
vision_token_provider: Callable[[], Awaitable[str]],
@@ -101,6 +112,7 @@ def __init__(
101112
self.query_speller = query_speller
102113
self.embedding_deployment = embedding_deployment
103114
self.embedding_model = embedding_model
115+
self.embedding_dimensions = embedding_dimensions
104116
self.openai_host = openai_host
105117
self.vision_endpoint = vision_endpoint
106118
self.vision_token_provider = vision_token_provider
@@ -204,10 +216,23 @@ def get_citation(self, sourcepage: str, use_image_citation: bool) -> str:
204216
return sourcepage
205217

206218
async def compute_text_embedding(self, q: str):
219+
SUPPORTED_DIMENSIONS_MODEL = {
220+
"text-embedding-ada-002": False,
221+
"text-embedding-3-small": True,
222+
"text-embedding-3-large": True,
223+
}
224+
225+
class ExtraArgs(TypedDict, total=False):
226+
dimensions: int
227+
228+
dimensions_args: ExtraArgs = (
229+
{"dimensions": self.embedding_dimensions} if SUPPORTED_DIMENSIONS_MODEL[self.embedding_model] else {}
230+
)
207231
embedding = await self.openai_client.embeddings.create(
208232
# Azure OpenAI takes the deployment name as the model name
209233
model=self.embedding_deployment if self.embedding_deployment else self.embedding_model,
210234
input=q,
235+
**dimensions_args,
211236
)
212237
query_vector = embedding.data[0].embedding
213238
return VectorizedQuery(vector=query_vector, k_nearest_neighbors=50, fields="embedding")

app/backend/approaches/chatreadretrieveread.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def __init__(
3232
chatgpt_deployment: Optional[str], # Not needed for non-Azure OpenAI
3333
embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text"
3434
embedding_model: str,
35+
embedding_dimensions: int,
3536
sourcepage_field: str,
3637
content_field: str,
3738
query_language: str,
@@ -44,6 +45,7 @@ def __init__(
4445
self.chatgpt_deployment = chatgpt_deployment
4546
self.embedding_deployment = embedding_deployment
4647
self.embedding_model = embedding_model
48+
self.embedding_dimensions = embedding_dimensions
4749
self.sourcepage_field = sourcepage_field
4850
self.content_field = content_field
4951
self.query_language = query_language

app/backend/approaches/chatreadretrievereadvision.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def __init__(
3535
gpt4v_model: str,
3636
embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text"
3737
embedding_model: str,
38+
embedding_dimensions: int,
3839
sourcepage_field: str,
3940
content_field: str,
4041
query_language: str,
@@ -50,6 +51,7 @@ def __init__(
5051
self.gpt4v_model = gpt4v_model
5152
self.embedding_deployment = embedding_deployment
5253
self.embedding_model = embedding_model
54+
self.embedding_dimensions = embedding_dimensions
5355
self.sourcepage_field = sourcepage_field
5456
self.content_field = content_field
5557
self.query_language = query_language

app/backend/approaches/retrievethenread.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def __init__(
5252
chatgpt_deployment: Optional[str], # Not needed for non-Azure OpenAI
5353
embedding_model: str,
5454
embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text"
55+
embedding_dimensions: int,
5556
sourcepage_field: str,
5657
content_field: str,
5758
query_language: str,
@@ -63,6 +64,7 @@ def __init__(
6364
self.auth_helper = auth_helper
6465
self.chatgpt_model = chatgpt_model
6566
self.embedding_model = embedding_model
67+
self.embedding_dimensions = embedding_dimensions
6668
self.chatgpt_deployment = chatgpt_deployment
6769
self.embedding_deployment = embedding_deployment
6870
self.sourcepage_field = sourcepage_field

app/backend/approaches/retrievethenreadvision.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(
4848
gpt4v_model: str,
4949
embedding_deployment: Optional[str], # Not needed for non-Azure OpenAI or for retrieval_mode="text"
5050
embedding_model: str,
51+
embedding_dimensions: int,
5152
sourcepage_field: str,
5253
content_field: str,
5354
query_language: str,
@@ -61,6 +62,7 @@ def __init__(
6162
self.auth_helper = auth_helper
6263
self.embedding_model = embedding_model
6364
self.embedding_deployment = embedding_deployment
65+
self.embedding_dimensions = embedding_dimensions
6466
self.sourcepage_field = sourcepage_field
6567
self.content_field = content_field
6668
self.gpt4v_deployment = gpt4v_deployment

azure.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ pipeline:
5151
- AZURE_OPENAI_EMB_DEPLOYMENT
5252
- AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY
5353
- AZURE_OPENAI_EMB_DEPLOYMENT_VERSION
54+
- AZURE_OPENAI_EMB_DIMENSIONS
5455
- OPENAI_HOST
5556
- OPENAI_API_KEY
5657
- OPENAI_ORGANIZATION

docs/deploy_features.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ This document covers optional features that can be enabled in the deployed Azure
55
You should typically enable these features before running `azd up`. Once you've set them, return to the [deployment steps](../README.md#deploying).
66

77
* [Using GPT-4](#using-gpt-4)
8+
* [Using text-embedding-3 models](#using-text-embedding-3-models)
89
* [Enabling GPT-4 Turbo with Vision](#enabling-gpt-4-turbo-with-vision)
910
* [Enabling Integrated Vectorization](#enabling-integrated-vectorization)
1011
* [Enabling authentication](#enabling-authentication)
@@ -58,6 +59,43 @@ Execute the following commands inside your terminal:
5859
>
5960
> Note that this does not delete your GPT-4 deployment; it just makes your application create a new or reuse an old GPT 3.5 deployment. If you want to delete it, you can go to your Azure OpenAI studio and do so.
6061

62+
## Using text-embedding-3 models
63+
64+
By default, the deployed Azure web app uses the `text-embedding-ada-002` embedding model. If you want to use one of the text-embedding-3 models, you can do so by following these steps:
65+
66+
1. Run one of the following commands to set the desired model:
67+
68+
```shell
69+
azd env set AZURE_OPENAI_EMB_MODEL_NAME text-embedding-3-small
70+
```
71+
72+
```shell
73+
azd env set AZURE_OPENAI_EMB_MODEL_NAME text-embedding-3-large
74+
```
75+
76+
2. Specify the desired dimensions of the model: (from 256-3072, model dependent)
77+
78+
```shell
79+
azd env set AZURE_OPENAI_EMB_DIMENSIONS 256
80+
```
81+
82+
3. Set the model version to "1" (the only version as of March 2024):
83+
84+
```shell
85+
azd env set AZURE_OPENAI_EMB_DEPLOYMENT_VERSION 1
86+
```
87+
88+
3. When prompted during `azd up`, make sure to select a region for the OpenAI resource group location that supports the text-embedding-3 models. There are [limited regions available](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#embeddings-models).
89+
90+
If you have already deployed:
91+
92+
* You'll need to change the deployment name by running `azd env set AZURE_OPENAI_EMB_DEPLOYMENT <new-deployment-name>`
93+
* You'll need to create a new index, and re-index all of the data using the new model. You can either delete the current index in the Azure Portal, or create an index with a different name by running `azd env set AZURE_SEARCH_INDEX new-index-name`. When you next run `azd up`, the new index will be created and the data will be re-indexed.
94+
* If your OpenAI resource is not in one of the supported regions, you should delete `openAiResourceGroupLocation` from `.azure/YOUR-ENV-NAME/config.json`. When running `azd up`, you will be prompted to select a new region.
95+
96+
> ![NOTE]
97+
> The text-embedding-3 models are not currently supported by the integrated vectorization feature.
98+
6199
## Enabling GPT-4 Turbo with Vision
62100

63101
This section covers the integration of GPT-4 Vision with Azure AI Search. Learn how to enhance your search capabilities with the power of image and text indexing, enabling advanced search functionalities over diverse document types. For a detailed guide on setup and usage, visit our [Enabling GPT-4 Turbo with Vision](docs/gpt4v.md) page.
@@ -73,6 +111,8 @@ To enable integrated vectorization with this sample:
73111
3. Run `azd up` to update system and user roles
74112
4. You can view the resources such as the indexer and skillset in Azure Portal and monitor the status of the vectorization process.
75113
114+
This feature is not currently compatible with GPT4-vision or the newer text-embedding-3 models.
115+
76116
## Enabling authentication
77117
78118
By default, the deployed Azure web app will have no authentication or access restrictions enabled, meaning anyone with routable network access to the web app can chat with your indexed data. If you'd like to automatically setup authentication and user login as part of the `azd up` process, see [this guide](./login_and_acl.md).

0 commit comments

Comments
 (0)