Skip to content

Commit 28536f6

Browse files
authored
Support use of AzureOpenAI proxy by prepdocs (#1760)
* Support custom URL for prepdocs * update upload * pwsh * pwsh * Move logic to embeddings * Keep service variable * Make mypy happy * Fix tests * Test coverage amount * Update E2E
1 parent e505ab7 commit 28536f6

File tree

8 files changed

+42
-7
lines changed

8 files changed

+42
-7
lines changed

.github/workflows/python-test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
run: black . --check --verbose
5959
- name: Run Python tests
6060
if: runner.os != 'Windows'
61-
run: python3 -m pytest -s -vv --cov --cov-fail-under=87
61+
run: python3 -m pytest -s -vv --cov --cov-fail-under=86
6262
- name: Run E2E tests with Playwright
6363
id: e2e
6464
if: runner.os != 'Windows'

app/backend/app.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ async def setup_clients():
400400
os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
401401
)
402402
AZURE_OPENAI_EMB_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT") if OPENAI_HOST.startswith("azure") else None
403+
AZURE_OPENAI_CUSTOM_URL = os.getenv("AZURE_OPENAI_CUSTOM_URL")
403404
AZURE_VISION_ENDPOINT = os.getenv("AZURE_VISION_ENDPOINT", "")
404405
# Used only with non-Azure OpenAI deployments
405406
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -499,6 +500,7 @@ async def setup_clients():
499500
openai_host=OPENAI_HOST,
500501
openai_model_name=OPENAI_EMB_MODEL,
501502
openai_service=AZURE_OPENAI_SERVICE,
503+
openai_custom_url=AZURE_OPENAI_CUSTOM_URL,
502504
openai_deployment=AZURE_OPENAI_EMB_DEPLOYMENT,
503505
openai_dimensions=OPENAI_EMB_DIMENSIONS,
504506
openai_key=clean_key_if_exists(OPENAI_API_KEY),
@@ -527,12 +529,14 @@ async def setup_clients():
527529

528530
if OPENAI_HOST.startswith("azure"):
529531
api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview"
530-
531532
if OPENAI_HOST == "azure_custom":
532-
endpoint = os.environ["AZURE_OPENAI_CUSTOM_URL"]
533+
if not AZURE_OPENAI_CUSTOM_URL:
534+
raise ValueError("AZURE_OPENAI_CUSTOM_URL must be set when OPENAI_HOST is azure_custom")
535+
endpoint = AZURE_OPENAI_CUSTOM_URL
533536
else:
537+
if not AZURE_OPENAI_SERVICE:
538+
raise ValueError("AZURE_OPENAI_SERVICE must be set when OPENAI_HOST is azure")
534539
endpoint = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
535-
536540
if api_key := os.getenv("AZURE_OPENAI_API_KEY"):
537541
openai_client = AsyncAzureOpenAI(api_version=api_version, azure_endpoint=endpoint, api_key=api_key)
538542
else:

app/backend/prepdocs.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def setup_embeddings_service(
109109
openai_host: str,
110110
openai_model_name: str,
111111
openai_service: Union[str, None],
112+
openai_custom_url: Union[str, None],
112113
openai_deployment: Union[str, None],
113114
openai_dimensions: int,
114115
openai_key: Union[str, None],
@@ -126,6 +127,7 @@ def setup_embeddings_service(
126127
)
127128
return AzureOpenAIEmbeddingService(
128129
open_ai_service=openai_service,
130+
open_ai_custom_url=openai_custom_url,
129131
open_ai_deployment=openai_deployment,
130132
open_ai_model_name=openai_model_name,
131133
open_ai_dimensions=openai_dimensions,
@@ -309,10 +311,16 @@ async def main(strategy: Strategy, setup_index: bool = True):
309311
parser.add_argument(
310312
"--disablebatchvectors", action="store_true", help="Don't compute embeddings in batch for the sections"
311313
)
314+
315+
parser.add_argument(
316+
"--openaicustomurl",
317+
required=False,
318+
help="Optional. Use this custom OpenAI URL instead of the default OpenAI URL",
319+
)
312320
parser.add_argument(
313321
"--openaikey",
314322
required=False,
315-
help="Optional. Use this Azure OpenAI account key instead of the current user identity to login (use az login to set current user for Azure). This is required only when using non-Azure endpoints.",
323+
help="Optional. Use this OpenAI account key instead of the current Azure user identity to login.",
316324
)
317325
parser.add_argument("--openaiorg", required=False, help="This is required only when using non-Azure endpoints.")
318326
parser.add_argument(
@@ -419,6 +427,7 @@ async def main(strategy: Strategy, setup_index: bool = True):
419427
openai_host=args.openaihost,
420428
openai_model_name=args.openaimodelname,
421429
openai_service=args.openaiservice,
430+
openai_custom_url=args.openaicustomurl,
422431
openai_deployment=args.openaideployment,
423432
openai_dimensions=args.openaidimensions,
424433
openai_key=clean_key_if_exists(args.openaikey),

app/backend/prepdocslib/embeddings.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,17 @@ def __init__(
164164
open_ai_model_name: str,
165165
open_ai_dimensions: int,
166166
credential: Union[AsyncTokenCredential, AzureKeyCredential],
167+
open_ai_custom_url: Union[str, None] = None,
167168
disable_batch: bool = False,
168169
):
169170
super().__init__(open_ai_model_name, open_ai_dimensions, disable_batch)
170171
self.open_ai_service = open_ai_service
172+
if open_ai_service:
173+
self.open_ai_endpoint = f"https://{open_ai_service}.openai.azure.com"
174+
elif open_ai_custom_url:
175+
self.open_ai_endpoint = open_ai_custom_url
176+
else:
177+
raise ValueError("Either open_ai_service or open_ai_custom_url must be provided")
171178
self.open_ai_deployment = open_ai_deployment
172179
self.credential = credential
173180

@@ -187,7 +194,7 @@ class AuthArgs(TypedDict, total=False):
187194
raise TypeError("Invalid credential type")
188195

189196
return AsyncAzureOpenAI(
190-
azure_endpoint=f"https://{self.open_ai_service}.openai.azure.com",
197+
azure_endpoint=self.open_ai_endpoint,
191198
azure_deployment=self.open_ai_deployment,
192199
api_version="2023-05-15",
193200
**auth_args,

scripts/prepdocs.ps1

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ if ($env:USE_FEATURE_INT_VECTORIZATION) {
6565
$integratedVectorizationArg = "--useintvectorization $env:USE_FEATURE_INT_VECTORIZATION"
6666
}
6767

68+
if ($env:AZURE_OPENAI_API_KEY) {
69+
$openaiApiKey = $env:AZURE_OPENAI_API_KEY
70+
} else {
71+
$openaiApiKey = $env:OPENAI_API_KEY
72+
}
73+
6874
$cwd = (Get-Location)
6975
$dataArg = "`"$cwd/data/*`""
7076

@@ -75,6 +81,7 @@ $argumentList = "./app/backend/prepdocs.py $dataArg --verbose " + `
7581
"$searchAnalyzerNameArg " + `
7682
"--openaihost `"$env:OPENAI_HOST`" --openaimodelname `"$env:AZURE_OPENAI_EMB_MODEL_NAME`" $openaiDimensionsArg " + `
7783
"--openaiservice `"$env:AZURE_OPENAI_SERVICE`" --openaideployment `"$env:AZURE_OPENAI_EMB_DEPLOYMENT`" " + `
84+
"--openaicustomurl `"$env:OPENAI_CUSTOM_URL`" " + `
7885
"--openaikey `"$env:OPENAI_API_KEY`" --openaiorg `"$env:OPENAI_ORGANIZATION`" " + `
7986
"--documentintelligenceservice $env:AZURE_DOCUMENTINTELLIGENCE_SERVICE " + `
8087
"$searchImagesArg $visionEndpointArg " + `

scripts/prepdocs.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ if [ -n "$USE_FEATURE_INT_VECTORIZATION" ]; then
6363
integratedVectorizationArg="--useintvectorization $USE_FEATURE_INT_VECTORIZATION"
6464
fi
6565

66+
if [ -n "$AZURE_OPENAI_API_KEY" ]; then
67+
openAiApiKeyArg="$AZURE_OPENAI_API_KEY"
68+
elif [ -n "$OPENAI_API_KEY" ]; then
69+
openAiApiKeyArg="$OPENAI_API_KEY"
70+
fi
6671

6772
./.venv/bin/python ./app/backend/prepdocs.py './data/*' --verbose \
6873
--subscriptionid $AZURE_SUBSCRIPTION_ID \
@@ -71,7 +76,8 @@ fi
7176
$searchAnalyzerNameArg \
7277
--openaihost "$OPENAI_HOST" --openaimodelname "$AZURE_OPENAI_EMB_MODEL_NAME" $openAiDimensionsArg \
7378
--openaiservice "$AZURE_OPENAI_SERVICE" --openaideployment "$AZURE_OPENAI_EMB_DEPLOYMENT" \
74-
--openaikey "$OPENAI_API_KEY" --openaiorg "$OPENAI_ORGANIZATION" \
79+
--openaicustomurl "$AZURE_OPENAI_CUSTOM_URL" \
80+
--openaikey $openAiApiKeyArg --openaiorg "$OPENAI_ORGANIZATION" \
7581
--documentintelligenceservice "$AZURE_DOCUMENTINTELLIGENCE_SERVICE" \
7682
$searchImagesArg $visionEndpointArg \
7783
$adlsGen2StorageAccountArg $adlsGen2FilesystemArg $adlsGen2FilesystemPathArg \

tests/e2e.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def run_server(port: int):
5454
"AZURE_SEARCH_SERVICE": "test-search-service",
5555
"AZURE_SPEECH_SERVICE_ID": "test-id",
5656
"AZURE_SPEECH_SERVICE_LOCATION": "eastus",
57+
"AZURE_OPENAI_SERVICE": "test-openai-service",
5758
"AZURE_OPENAI_CHATGPT_MODEL": "gpt-35-turbo",
5859
},
5960
clear=True,

tests/test_app_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def minimal_env(monkeypatch):
1313
monkeypatch.setenv("AZURE_STORAGE_CONTAINER", "test-storage-container")
1414
monkeypatch.setenv("AZURE_SEARCH_INDEX", "test-search-index")
1515
monkeypatch.setenv("AZURE_SEARCH_SERVICE", "test-search-service")
16+
monkeypatch.setenv("AZURE_OPENAI_SERVICE", "test-openai-service")
1617
monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-35-turbo")
1718
yield
1819

0 commit comments

Comments
 (0)