Skip to content

Commit eb43f1f

Browse files
feedback changes v1
1 parent ee59a1c commit eb43f1f

File tree

4 files changed

+87
-75
lines changed

4 files changed

+87
-75
lines changed

infra/scripts/index_scripts/03_cu_process_data_text.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
import pyodbc
66
import pandas as pd
77
from datetime import datetime, timedelta
8+
from urllib.parse import urlparse
89
from azure.identity import get_bearer_token_provider
910
from azure.keyvault.secrets import SecretClient
1011
from azure.search.documents import SearchClient
1112
from azure.search.documents.indexes import SearchIndexClient
1213
from azure.storage.filedatalake import DataLakeServiceClient
13-
# --- REPLACED: from openai import AzureOpenAI
14-
from azure.ai.inference import ChatCompletionsClient, EmbeddingsClient # Azure AI Foundry (Inference) SDK
15-
# ---------------------------------------------
14+
from azure.ai.inference import ChatCompletionsClient, EmbeddingsClient
15+
from azure.ai.inference.models import SystemMessage, UserMessage
1616
from content_understanding_client import AzureContentUnderstandingClient
1717
from azure_credential_utils import get_azure_credential
1818

@@ -31,9 +31,10 @@ def get_secrets_from_kv(kv_name, secret_name):
3131

3232
# Retrieve secrets
3333
search_endpoint = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-SEARCH-ENDPOINT")
34-
openai_api_base = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-ENDPOINT")
34+
ai_project_endpoint = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-AI-AGENT-ENDPOINT")
3535
openai_api_version = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-PREVIEW-API-VERSION")
3636
deployment = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-DEPLOYMENT-MODEL")
37+
embedding_deployment = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-EMBEDDING-MODEL")
3738
account_name = get_secrets_from_kv(KEY_VAULT_NAME, "ADLS-ACCOUNT-NAME")
3839
server = get_secrets_from_kv(KEY_VAULT_NAME, "SQLDB-SERVER")
3940
database = get_secrets_from_kv(KEY_VAULT_NAME, "SQLDB-DATABASE")
@@ -79,32 +80,29 @@ def get_secrets_from_kv(kv_name, secret_name):
7980
print("Content Understanding client initialized.")
8081

8182
# ---------- Azure AI Foundry (Inference) clients (Managed Identity) ----------
82-
# For Azure OpenAI endpoints, the Inference SDK expects the deployment path and api_version + scopes.
83-
# chat deployment (already coming from Key Vault as `deployment`)
84-
chat_endpoint = f"{openai_api_base}/openai/deployments/{deployment}"
83+
# Project endpoint has the form: https://your-ai-services-account-name.services.ai.azure.com/api/projects/your-project-name
84+
# Inference endpoint has the form: https://your-ai-services-account-name.services.ai.azure.com/models
85+
# Strip the "/api/projects/your-project-name" part and replace with "/models":
86+
inference_endpoint = f"https://{urlparse(ai_project_endpoint).netloc}/models"
87+
8588
chat_client = ChatCompletionsClient(
86-
endpoint=chat_endpoint,
89+
endpoint=inference_endpoint,
8790
credential=credential,
88-
credential_scopes=["https://cognitiveservices.azure.com/.default"],
89-
api_version=openai_api_version,
91+
credential_scopes=["https://ai.azure.com/.default"],
9092
)
91-
# embedding deployment name (assumes you deployed with the name below — change if different)
92-
embedding_deployment = "text-embedding-ada-002"
93-
embeddings_endpoint = f"{openai_api_base}/openai/deployments/{embedding_deployment}"
93+
9494
embeddings_client = EmbeddingsClient(
95-
endpoint=embeddings_endpoint,
95+
endpoint=inference_endpoint,
9696
credential=credential,
97-
credential_scopes=["https://cognitiveservices.azure.com/.default"],
98-
api_version=openai_api_version,
97+
credential_scopes=["https://ai.azure.com/.default"],
9998
)
10099
# -----------------------------------------------------------------------------
101100

102101
# Utility functions
103102
def get_embeddings(text: str):
104-
# Uses Azure AI Inference EmbeddingsClient; returns the vector for `text`.
105-
# NOTE: Endpoint includes the AOAI deployment name.
103+
# Uses Azure AI Inference EmbeddingsClient with the AI Foundry project inference endpoint.
106104
try:
107-
resp = embeddings_client.embed(input=[text])
105+
resp = embeddings_client.embed(model=embedding_deployment, input=[text])
108106
return resp.data[0].embedding
109107
except Exception as e:
110108
print(f"Error getting embeddings: {e}")
@@ -298,11 +296,12 @@ def call_gpt4(topics_str1, client):
298296
Return the topics and their labels in JSON format.Always add 'topics' node and 'label', 'description' attributes in json.
299297
Do not return anything else.
300298
"""
301-
# Inference client: Chat completions
299+
# Inference client: Chat completions with model deployment name
302300
response = client.complete(
301+
model=deployment,
303302
messages=[
304-
{"role": "system", "content": "You are a helpful assistant."},
305-
{"role": "user", "content": topic_prompt},
303+
SystemMessage(content="You are a helpful assistant."),
304+
UserMessage(content=topic_prompt),
306305
],
307306
temperature=0,
308307
)
@@ -331,9 +330,10 @@ def get_mined_topic_mapping(input_text, list_of_topics):
331330
from a list of topics - {list_of_topics}.
332331
ALWAYS only return a topic from list - {list_of_topics}. Do not add any other text.'''
333332
response = chat_client.complete(
333+
model=deployment,
334334
messages=[
335-
{"role": "system", "content": "You are a helpful assistant."},
336-
{"role": "user", "content": prompt},
335+
SystemMessage(content="You are a helpful assistant."),
336+
UserMessage(content=prompt),
337337
],
338338
temperature=0,
339339
)

infra/scripts/index_scripts/03_cu_process_data_text_manual.py

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import pyodbc
66
import pandas as pd
77
from datetime import datetime, timedelta
8+
from urllib.parse import urlparse
89
from azure.identity import get_bearer_token_provider
910
from azure.keyvault.secrets import SecretClient
1011
from azure.search.documents import SearchClient
1112
from azure.search.documents.indexes import SearchIndexClient
1213
from azure.storage.filedatalake import DataLakeServiceClient
1314
from azure.ai.inference import ChatCompletionsClient, EmbeddingsClient
15+
from azure.ai.inference.models import SystemMessage, UserMessage
1416
from content_understanding_client import AzureContentUnderstandingClient
1517
from azure_credential_utils import get_azure_credential
1618

@@ -31,14 +33,16 @@ def get_secrets_from_kv(kv_name, secret_name):
3133

3234
# Retrieve secrets
3335
search_endpoint = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-SEARCH-ENDPOINT")
34-
openai_api_base = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-ENDPOINT")
36+
ai_project_endpoint = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-AI-AGENT-ENDPOINT")
3537
openai_api_version = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-PREVIEW-API-VERSION")
3638
deployment = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-DEPLOYMENT-MODEL")
39+
embedding_deployment = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-EMBEDDING-MODEL")
3740
account_name = get_secrets_from_kv(KEY_VAULT_NAME, "ADLS-ACCOUNT-NAME")
3841
server = get_secrets_from_kv(KEY_VAULT_NAME, "SQLDB-SERVER")
3942
database = get_secrets_from_kv(KEY_VAULT_NAME, "SQLDB-DATABASE")
4043
azure_ai_endpoint = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-CU-ENDPOINT")
4144
azure_ai_api_version = "2024-12-01-preview"
45+
azure_ai_api_version = "2024-12-01-preview"
4246
print("Secrets retrieved.")
4347

4448
# Azure DataLake setup
@@ -80,33 +84,30 @@ def get_secrets_from_kv(kv_name, secret_name):
8084

8185

8286
# ---------- Azure AI Foundry (Inference) clients (Managed Identity) ----------
83-
# For Azure OpenAI endpoints, the Inference SDK expects the deployment path and api_version + scopes.
84-
# chat deployment (already coming from Key Vault as `deployment`)
85-
chat_endpoint = f"{openai_api_base}/openai/deployments/{deployment}"
87+
# Project endpoint has the form: https://your-ai-services-account-name.services.ai.azure.com/api/projects/your-project-name
88+
# Inference endpoint has the form: https://your-ai-services-account-name.services.ai.azure.com/models
89+
# Strip the "/api/projects/your-project-name" part and replace with "/models":
90+
inference_endpoint = f"https://{urlparse(ai_project_endpoint).netloc}/models"
91+
8692
chat_client = ChatCompletionsClient(
87-
endpoint=chat_endpoint,
93+
endpoint=inference_endpoint,
8894
credential=credential,
89-
credential_scopes=["https://cognitiveservices.azure.com/.default"],
90-
api_version=openai_api_version,
95+
credential_scopes=["https://ai.azure.com/.default"],
9196
)
92-
# embedding deployment name (assumes you deployed with the name below — change if different)
93-
embedding_deployment = "text-embedding-ada-002"
94-
embeddings_endpoint = f"{openai_api_base}/openai/deployments/{embedding_deployment}"
97+
9598
embeddings_client = EmbeddingsClient(
96-
endpoint=embeddings_endpoint,
99+
endpoint=inference_endpoint,
97100
credential=credential,
98-
credential_scopes=["https://cognitiveservices.azure.com/.default"],
99-
api_version=openai_api_version,
101+
credential_scopes=["https://ai.azure.com/.default"],
100102
)
101103
# -----------------------------------------------------------------------------
102104

103105

104106
# Utility functions
105107
def get_embeddings(text: str):
106-
# Uses Azure AI Inference EmbeddingsClient; returns the vector for `text`.
107-
# NOTE: Endpoint includes the AOAI deployment name.
108+
# Uses Azure AI Inference EmbeddingsClient with the AI Foundry project inference endpoint.
108109
try:
109-
resp = embeddings_client.embed(input=[text])
110+
resp = embeddings_client.embed(model=embedding_deployment, input=[text])
110111
return resp.data[0].embedding
111112
except Exception as e:
112113
print(f"Error getting embeddings: {e}")
@@ -159,11 +160,11 @@ def prepare_search_doc(content, document_id, path_name):
159160
for idx, chunk in enumerate(chunks, 1):
160161
chunk_id = f"{document_id}_{str(idx).zfill(2)}"
161162
try:
162-
v_contentVector = get_embeddings(str(chunk),openai_api_base,openai_api_version)
163+
v_contentVector = get_embeddings(str(chunk))
163164
except:
164165
time.sleep(30)
165166
try:
166-
v_contentVector = get_embeddings(str(chunk),openai_api_base,openai_api_version)
167+
v_contentVector = get_embeddings(str(chunk))
167168
except:
168169
v_contentVector = []
169170
docs.append({
@@ -300,11 +301,12 @@ def call_gpt4(topics_str1, client):
300301
Return the topics and their labels in JSON format.Always add 'topics' node and 'label', 'description' attributes in json.
301302
Do not return anything else.
302303
"""
303-
# Inference client: Chat completions
304+
# Inference client: Chat completions with model deployment name
304305
response = client.complete(
306+
model=deployment,
305307
messages=[
306-
{"role": "system", "content": "You are a helpful assistant."},
307-
{"role": "user", "content": topic_prompt},
308+
SystemMessage(content="You are a helpful assistant."),
309+
UserMessage(content=topic_prompt),
308310
],
309311
temperature=0,
310312
)
@@ -332,9 +334,10 @@ def get_mined_topic_mapping(input_text, list_of_topics):
332334
from a list of topics - {list_of_topics}.
333335
ALWAYS only return a topic from list - {list_of_topics}. Do not add any other text.'''
334336
response = chat_client.complete(
337+
model=deployment,
335338
messages=[
336-
{"role": "system", "content": "You are a helpful assistant."},
337-
{"role": "user", "content": prompt},
339+
SystemMessage(content="You are a helpful assistant."),
340+
UserMessage(content=prompt),
338341
],
339342
temperature=0,
340343
)

infra/scripts/index_scripts/04_cu_process_data_new_data.py

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import pyodbc
66
import pandas as pd
77
from datetime import datetime, timedelta
8+
from urllib.parse import urlparse
89
from azure.identity import get_bearer_token_provider
910
from azure.keyvault.secrets import SecretClient
1011
from azure.search.documents import SearchClient
1112
from azure.search.documents.indexes import SearchIndexClient
1213
from azure.storage.filedatalake import DataLakeServiceClient
1314
from azure.ai.inference import ChatCompletionsClient, EmbeddingsClient
15+
from azure.ai.inference.models import SystemMessage, UserMessage
1416
from content_understanding_client import AzureContentUnderstandingClient
1517
from azure_credential_utils import get_azure_credential
1618
from azure.search.documents.indexes.models import (
@@ -43,8 +45,7 @@ def get_secrets_from_kv(kv_name, secret_name):
4345

4446
# Retrieve secrets
4547
search_endpoint = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-SEARCH-ENDPOINT")
46-
openai_api_base = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-ENDPOINT")
47-
openai_api_version = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-PREVIEW-API-VERSION")
48+
ai_project_endpoint = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-AI-AGENT-ENDPOINT")
4849
deployment = get_secrets_from_kv(KEY_VAULT_NAME, "AZURE-OPENAI-DEPLOYMENT-MODEL")
4950
account_name = get_secrets_from_kv(KEY_VAULT_NAME, "ADLS-ACCOUNT-NAME")
5051
server = get_secrets_from_kv(KEY_VAULT_NAME, "SQLDB-SERVER")
@@ -70,14 +71,21 @@ def get_secrets_from_kv(kv_name, secret_name):
7071
print("Azure Search setup complete.")
7172

7273
# ---------- Azure AI Foundry (Inference) clients (Managed Identity) ----------
73-
# For Azure OpenAI endpoints, the Inference SDK expects the deployment path and api_version + scopes.
74-
# chat deployment (already coming from Key Vault as `deployment`)
75-
chat_endpoint = f"{openai_api_base}/openai/deployments/{deployment}"
74+
# Project endpoint has the form: https://your-ai-services-account-name.services.ai.azure.com/api/projects/your-project-name
75+
# Inference endpoint has the form: https://your-ai-services-account-name.services.ai.azure.com/models
76+
# Strip the "/api/projects/your-project-name" part and replace with "/models":
77+
inference_endpoint = f"https://{urlparse(ai_project_endpoint).netloc}/models"
78+
7679
chat_client = ChatCompletionsClient(
77-
endpoint=chat_endpoint,
80+
endpoint=inference_endpoint,
81+
credential=credential,
82+
credential_scopes=["https://ai.azure.com/.default"],
83+
)
84+
85+
embeddings_client = EmbeddingsClient(
86+
endpoint=inference_endpoint,
7887
credential=credential,
79-
credential_scopes=["https://cognitiveservices.azure.com/.default"],
80-
api_version=openai_api_version,
88+
credential_scopes=["https://ai.azure.com/.default"],
8189
)
8290

8391
# Delete the search index
@@ -125,7 +133,7 @@ def create_search_index():
125133
vectorizer_name="myOpenAI",
126134
kind="azureOpenAI",
127135
parameters=AzureOpenAIVectorizerParameters(
128-
resource_url=openai_api_base,
136+
resource_url=ai_project_endpoint,
129137
deployment_name=embedding_model,
130138
model_name=embedding_model
131139
)
@@ -178,16 +186,14 @@ def create_search_index():
178186
print("Content Understanding client initialized.")
179187

180188
# Utility functions
181-
def get_embeddings(text: str, openai_api_base, openai_api_version):
182-
embeddings_endpoint = f"{openai_api_base}/openai/deployments/{embedding_model}"
183-
embeddings_client = EmbeddingsClient(
184-
endpoint=embeddings_endpoint,
185-
credential=credential,
186-
credential_scopes=["https://cognitiveservices.azure.com/.default"],
187-
api_version=openai_api_version
188-
)
189-
response = embeddings_client.embed(input=[text])
190-
return response.data[0].embedding
189+
def get_embeddings(text: str):
190+
# Uses Azure AI Inference EmbeddingsClient with the AI Foundry project inference endpoint.
191+
try:
192+
resp = embeddings_client.embed(model=embedding_model, input=[text])
193+
return resp.data[0].embedding
194+
except Exception as e:
195+
print(f"Error getting embeddings: {e}")
196+
raise
191197
# --------------------------------------------------------------------------
192198

193199
def clean_spaces_with_regex(text):
@@ -217,12 +223,14 @@ def prepare_search_doc(content, document_id, path_name):
217223
for idx, chunk in enumerate(chunks, 1):
218224
chunk_id = f"{document_id}_{str(idx).zfill(2)}"
219225
try:
220-
v_contentVector = get_embeddings(str(chunk),openai_api_base,openai_api_version)
221-
except:
226+
v_contentVector = get_embeddings(str(chunk))
227+
except Exception as e:
228+
print(f"Error getting embeddings on first try: {e}")
222229
time.sleep(30)
223230
try:
224-
v_contentVector = get_embeddings(str(chunk),openai_api_base,openai_api_version)
225-
except:
231+
v_contentVector = get_embeddings(str(chunk))
232+
except Exception as e:
233+
print(f"Error getting embeddings: {e}")
226234
v_contentVector = []
227235
docs.append({
228236
"id": chunk_id,
@@ -402,9 +410,10 @@ def call_gpt4(topics_str1, client):
402410
Do not return anything else.
403411
"""
404412
response = client.complete(
413+
model=deployment,
405414
messages=[
406-
{"role": "system", "content": "You are a helpful assistant."},
407-
{"role": "user", "content": topic_prompt},
415+
SystemMessage(content="You are a helpful assistant."),
416+
UserMessage(content=topic_prompt),
408417
],
409418
temperature=0,
410419
)
@@ -431,9 +440,10 @@ def get_mined_topic_mapping(input_text, list_of_topics):
431440
from a list of topics - {list_of_topics}.
432441
ALWAYS only return a topic from list - {list_of_topics}. Do not add any other text.'''
433442
response = chat_client.complete(
443+
model=deployment,
434444
messages=[
435-
{"role": "system", "content": "You are a helpful assistant."},
436-
{"role": "user", "content": prompt},
445+
SystemMessage(content="You are a helpful assistant."),
446+
UserMessage(content=prompt),
437447
],
438448
temperature=0,
439449
)

src/api/services/history_service.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ async def generate_title(self, conversation_messages):
7373
final_prompt = f"{combined_content}\n\n{title_prompt}"
7474

7575
try:
76-
logger.info('Testing R01')
7776
project_client = AIProjectClient(
7877
endpoint=self.ai_project_endpoint,
7978
credential=get_azure_credential(client_id=self.azure_client_id),

0 commit comments

Comments
 (0)