chore: update default OpenAI model to gpt-4.1 across codebase and infra (#1822)

Harmanpreet-Microsoft · Harmanpreet Kaur · web-flow · commit f075a9b6d111 · 2025-06-10T12:48:05.000+05:30
Co-authored-by: Harmanpreet Kaur &lt;v-harmanpkau@microsoft.com&gt;
diff --git a/README.md b/README.md
@@ -194,10 +194,10 @@ Select either "PostgreSQL" or "Cosmos DB":
 
 When Deployment is complete, follow steps in [Set Up Authentication in Azure App Service](./docs/azure_app_service_auth_setup.md) to add app authentication to your web app running on Azure App Service
 
-**Note**: The default configuration deploys an OpenAI Model "gpt-4o" with version 2024-05-13. However, not all
+**Note**: The default configuration deploys an OpenAI Model "gpt-4.1" with version 2025-04-14. However, not all
 locations support this version. If you're deploying to a location that doesn't support version 2024-05-13, you'll need to
 switch to a lower version. To find out which versions are supported in different regions, visit the
-[GPT-4o Model Availability](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#global-standard-model-availability) page.
+[GPT-4.1 Model Availability](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#global-standard-model-availability) page.
 
 ### Testing the deployment
 1. Navigate to the admin site, where you can upload documents. It will be located at:
diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py
@@ -174,10 +174,10 @@ def __load_config(self, **kwargs) -> None:
         else:
             # Otherwise, fallback to individual environment variables
             self.AZURE_OPENAI_MODEL = os.getenv(
-                "AZURE_OPENAI_MODEL", "gpt-4o"
+                "AZURE_OPENAI_MODEL", "gpt-4.1"
             )
             self.AZURE_OPENAI_MODEL_NAME = os.getenv(
-                "AZURE_OPENAI_MODEL_NAME", "gpt-4o"
+                "AZURE_OPENAI_MODEL_NAME", "gpt-4.1"
             )
 
         self.AZURE_OPENAI_VISION_MODEL = os.getenv("AZURE_OPENAI_VISION_MODEL", "gpt-4")
diff --git a/code/tests/functional/tests/backend_api/default/test_conversation.py b/code/tests/functional/tests/backend_api/default/test_conversation.py
@@ -65,7 +65,7 @@ def completions_mocking(httpserver: HTTPServer, app_config: AppConfig):
             "id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9",
             "object": "chat.completion",
             "created": 1679072642,
-            "model": "gpt-4o",
+            "model": "gpt-4.1",
             "usage": {
                 "prompt_tokens": 40,
                 "completion_tokens": 50,
diff --git a/code/tests/functional/tests/backend_api/default/test_post_prompt_tool.py b/code/tests/functional/tests/backend_api/default/test_post_prompt_tool.py
@@ -100,7 +100,7 @@ def completions_mocking(httpserver: HTTPServer, app_config: AppConfig):
             "id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9",
             "object": "chat.completion",
             "created": 1679072642,
-            "model": "gpt-4o",
+            "model": "gpt-4.1",
             "usage": {
                 "prompt_tokens": 40,
                 "completion_tokens": 50,
@@ -132,7 +132,7 @@ def test_post_responds_successfully_when_not_filtered(
             "id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9",
             "object": "chat.completion",
             "created": 1679072642,
-            "model": "gpt-4o",
+            "model": "gpt-4.1",
             "usage": {
                 "prompt_tokens": 40,
                 "completion_tokens": 50,
@@ -193,7 +193,7 @@ def test_post_responds_successfully_when_filtered(
             "id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9",
             "object": "chat.completion",
             "created": 1679072642,
-            "model": "gpt-4o",
+            "model": "gpt-4.1",
             "usage": {
                 "prompt_tokens": 40,
                 "completion_tokens": 50,
@@ -254,7 +254,7 @@ def test_post_makes_correct_call_to_openai_from_post_prompt_tool(
             "id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9",
             "object": "chat.completion",
             "created": 1679072642,
-            "model": "gpt-4o",
+            "model": "gpt-4.1",
             "usage": {
                 "prompt_tokens": 40,
                 "completion_tokens": 50,
diff --git a/code/tests/functional/tests/backend_api/integrated_vectorization_custom_conversation/test_iv_question_answer_tool.py b/code/tests/functional/tests/backend_api/integrated_vectorization_custom_conversation/test_iv_question_answer_tool.py
@@ -63,7 +63,7 @@ def completions_mocking(httpserver: HTTPServer, app_config: AppConfig):
             "id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9",
             "object": "chat.completion",
             "created": 1679072642,
-            "model": "gpt-4o",
+            "model": "gpt-4.1",
             "usage": {
                 "prompt_tokens": 40,
                 "completion_tokens": 50,
diff --git a/docs/LOCAL_DEPLOYMENT.md b/docs/LOCAL_DEPLOYMENT.md
@@ -191,7 +191,7 @@ Execute the above [shell command](#L81) to run the function locally. You may nee
 |AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION ||Whether to use [Integrated Vectorization](https://learn.microsoft.com/en-us/azure/search/vector-search-integrated-vectorization)|
 |AZURE_OPENAI_RESOURCE||the name of your Azure OpenAI resource|
 |AZURE_OPENAI_MODEL||The name of your model deployment|
-|AZURE_OPENAI_MODEL_NAME|gpt-4o|The name of the model|
+|AZURE_OPENAI_MODEL_NAME|gpt-4.1|The name of the model|
 |AZURE_OPENAI_MODEL_VERSION|2024-05-13|The version of the model to use|
 |AZURE_OPENAI_API_KEY||One of the API keys of your Azure OpenAI resource|
 |AZURE_OPENAI_EMBEDDING_MODEL|text-embedding-ada-002|The name of your Azure OpenAI embeddings model deployment|
diff --git a/docs/TEAMS_LOCAL_DEPLOYMENT.md b/docs/TEAMS_LOCAL_DEPLOYMENT.md
@@ -65,7 +65,7 @@ Or use the [Azure Functions VS Code extension](https://marketplace.visualstudio.
 |AZURE_SEARCH_FILTER||Filter to apply to search queries.|
 |AZURE_OPENAI_RESOURCE||the name of your Azure OpenAI resource|
 |AZURE_OPENAI_MODEL||The name of your model deployment|
-|AZURE_OPENAI_MODEL_NAME|gpt-4o|The name of the model|
+|AZURE_OPENAI_MODEL_NAME|gpt-4.1|The name of the model|
 |AZURE_OPENAI_API_KEY||One of the API keys of your Azure OpenAI resource|
 |AZURE_OPENAI_EMBEDDING_MODEL|text-embedding-ada-002|The name of you Azure OpenAI embeddings model deployment|
 |AZURE_OPENAI_TEMPERATURE|0|What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. A value of 0 is recommended when using your data.|
diff --git a/docs/contract_assistance.md b/docs/contract_assistance.md
@@ -10,9 +10,9 @@ The following is the Chat With Your Data infrastructure configuration that we su
 - **Azure Semantic Search**: Utilize Azure Semantic Search to efficiently index and search legal documents. This provides powerful search capabilities and integration with other Azure services.
 - **Azure Cognitive Search Top K 15**: Set the Top K parameter to 15 to retrieve the top 15 most relevant documents. This configuration helps in providing precise and relevant search results for user queries.
 - **Azure Search Integrated Vectorization**: Enable integrated vectorization in Azure Search to improve the semantic understanding and relevance of search results. This enhances the Contract Assistant's ability to provide contextually accurate answers.
-- **Azure OpenAI Model gpt-4o**: Leverage the Azure OpenAI model gpt-4o for advanced natural language processing capabilities. This model is well-suited for handling complex legal queries and providing detailed and contextually appropriate responses.
+- **Azure OpenAI Model gpt-4.1**: Leverage the Azure OpenAI model gpt-4.1 for advanced natural language processing capabilities. This model is well-suited for handling complex legal queries and providing detailed and contextually appropriate responses.
 - **Orchestration Strategy: Semantic Kernel**: Implement the Semantic Kernel orchestration strategy to effectively manage the integration and interaction between different components of the infrastructure. This strategy ensures seamless operation and optimal performance of the Contract Assistant.
-- **Conversation Flow Options**: Setting `CONVERSATION_FLOW` enables running advanced AI models like GPT-4o on your own enterprise data without needing to train or fine-tune models.
+- **Conversation Flow Options**: Setting `CONVERSATION_FLOW` enables running advanced AI models like GPT-4.1 on your own enterprise data without needing to train or fine-tune models.
 
 By following these infrastructure configurations, you can enhance the efficiency, accuracy, and overall performance of the Chat With Your Data Contract Review and Summarization Assistant, ensuring it meets the high demands and expectations of  professionals.
 
@@ -22,8 +22,8 @@ To apply the suggested configurations in your deployment, update the following f
 - **Azure Semantic Search**: Set `AZURE_SEARCH_USE_SEMANTIC_SEARCH` to `true`
 - **Azure Cognitive Search Top K 15**: Set `AZURE_SEARCH_TOP_K` to `15`.
 - **Azure Search Integrated Vectorization**: Set `AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION` to `true`.
-- **Azure OpenAI Model**: Set `AZURE_OPENAI_MODEL`  to `gpt-4o`.
-- **Azure OpenAI Model Name**: Set `AZURE_OPENAI_MODEL_NAME` to `gpt-4o`. (could be different based on the name of the Azure OpenAI model deployment)
+- **Azure OpenAI Model**: Set `AZURE_OPENAI_MODEL`  to `gpt-4.1`.
+- **Azure OpenAI Model Name**: Set `AZURE_OPENAI_MODEL_NAME` to `gpt-4.1`. (could be different based on the name of the Azure OpenAI model deployment)
 - **Azure OpenAI Model Name Version**: Set `AZURE_OPENAI_MODEL_VERSION` to `2024-05-13`.
 - **Conversation Flow Options**: Set `CONVERSATION_FLOW` to `byod`
 - **Orchestration Strategy**: Set `ORCHESTRATION_STRATEGY` to `Semantic Kernel`.
diff --git a/docs/employee_assistance.md b/docs/employee_assistance.md
@@ -10,9 +10,9 @@ The following is the Chat With Your Data infrastructure configuration that we su
 - **Azure Semantic Search**: Utilize Azure Semantic Search to efficiently index and search employee handbooks and corporate policy documents. This provides powerful search capabilities and integration with other Azure services.
 - **Azure Cognitive Search Top K 15**: Set the Top K parameter to 15 to retrieve the top 15 most relevant documents. This configuration helps in providing precise and relevant search results for user queries.
 - **Azure Search Integrated Vectorization**: Enable integrated vectorization in Azure Search to improve the semantic understanding and relevance of search results. This enhances the Contract Assistant's ability to provide contextually accurate answers.
-- **Azure OpenAI Model gpt-4o**: Leverage the Azure OpenAI model gpt-4o for advanced natural language processing capabilities. This model is well-suited for handling complex legal queries and providing detailed and contextually appropriate responses.
+- **Azure OpenAI Model gpt-4.1**: Leverage the Azure OpenAI model gpt-4.1 for advanced natural language processing capabilities. This model is well-suited for handling complex legal queries and providing detailed and contextually appropriate responses.
 - **Orchestration Strategy: Semantic Kernel**: Implement the Semantic Kernel orchestration strategy to effectively manage the integration and interaction between different components of the infrastructure. This strategy ensures seamless operation and optimal performance of the Employee Assistant.
-- **Conversation Flow Options**: Setting `CONVERSATION_FLOW` enables running advanced AI models like GPT-4o on your own enterprise data without needing to train or fine-tune models.
+- **Conversation Flow Options**: Setting `CONVERSATION_FLOW` enables running advanced AI models like GPT-4.1 on your own enterprise data without needing to train or fine-tune models.
 
 By following these infrastructure configurations, you can enhance the efficiency, accuracy, and overall performance of the Chat With Your Data Employee Assistant, ensuring it meets the high demands and expectations of  professionals.
 
@@ -22,7 +22,7 @@ To apply the suggested configurations in your deployment, update the following f
 - **Azure Semantic Search**: Set `AZURE_SEARCH_USE_SEMANTIC_SEARCH` to `true`
 - **Azure Cognitive Search Top K 15**: Set `AZURE_SEARCH_TOP_K` to `15`.
 - **Azure Search Integrated Vectorization**: Set `AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION` to `true`.
-- **Azure OpenAI Model Info**: Set `AZURE_OPENAI_MODEL_INFO`  to `{"model":"gpt-4o","modelName":"gpt-4o","modelVersion":"2024-05-13"}`.(model could be different based on the name of the Azure OpenAI model deployment).
+- **Azure OpenAI Model Info**: Set `AZURE_OPENAI_MODEL_INFO`  to `{"model":"gpt-4.1","modelName":"gpt-4.1","modelVersion":"2025-04-14"}`.(model could be different based on the name of the Azure OpenAI model deployment).
 - **Conversation Flow Options**: Set `CONVERSATION_FLOW` to `byod`
 - **Orchestration Strategy**: Set `ORCHESTRATION_STRATEGY` to `Semantic Kernel`.
 
diff --git a/docs/model_configuration.md b/docs/model_configuration.md
@@ -15,11 +15,11 @@ This document outlines the necessary steps and configurations required for setti
 
 ### LLM
 - `AZURE_OPENAI_MODEL`: The Azure OpenAI Model Deployment Name
-    - example: `my-gpt-4o`
+    - example: `my-gpt-4.1`
 - `AZURE_OPENAI_MODEL_NAME`: The Azure OpenAI Model Name
-    - example: `gpt-4o`
+    - example: `gpt-4.1`
 - `AZURE_OPENAI_MODEL_VERSION`: The Azure OpenAI Model Version
-    - example: `2024-05-13`
+    - example: `2025-04-14`
 - `AZURE_OPENAI_MODEL_CAPACITY`: The Tokens per Minute Rate Limit (thousands)
     - example: `30`
 
@@ -62,12 +62,12 @@ This document outlines the necessary steps and configurations required for setti
 - To get the value of an environment variable, you can use the following command:
     - `azd env get <ENVIRONMENT_VARIABLE_NAME>`
 
-## GPT-4o & Text-Embeddings-3-Large
-- The following environment variables are set for the GPT-4o and Text-Embeddings-3-Large models:
+## GPT-4.1 & Text-Embeddings-3-Large
+- The following environment variables are set for the GPT-4.1 and Text-Embeddings-3-Large models:
     - `AZURE_OPENAI_API_VERSION`: `2024-05-01-preview`
-    - `AZURE_OPENAI_MODEL`: `my-gpt-4o`
-    - `AZURE_OPENAI_MODEL_NAME`: `gpt-4o`
-    - `AZURE_OPENAI_MODEL_VERSION`: `2024-05-13`
+    - `AZURE_OPENAI_MODEL`: `my-gpt-4.1`
+    - `AZURE_OPENAI_MODEL_NAME`: `gpt-4.1`
+    - `AZURE_OPENAI_MODEL_VERSION`: `2025-04-14`
     - `AZURE_OPENAI_EMBEDDING_MODEL`: `my-text-embedding-3-large`
     - `AZURE_OPENAI_EMBEDDING_MODEL_NAME`: `text-embedding-3-large`
     - `AZURE_OPENAI_EMBEDDING_MODEL_VERSION`: `1`
diff --git a/docs/transparency_faq.md b/docs/transparency_faq.md
@@ -2,7 +2,7 @@
 
 ### What is Chat with your data Solution Accelerator?
 
-This solution accelerator is an open-source GitHub Repository for the "Chat with your data" solution that combines the capabilities of Azure AI Search and GPT 3.5, 4, and 4o to create a conversational search experience. This solution accelerator uses Azure OpenAI GPT and embedding models, and an Azure AI Search index generated data by the customer, once installed/deployed, which is integrated into a web application to provide a natural language interface for search queries. The repository showcases a sample scenario of a contract analyst who wants to review and summarize relevant contracts, and another use case for a wealth advisor who is interested in reviewing market and fund documents.
+This solution accelerator is an open-source GitHub Repository for the "Chat with your data" solution that combines the capabilities of Azure AI Search and GPT 3.5, 4 and 4.1 to create a conversational search experience. This solution accelerator uses Azure OpenAI GPT and embedding models, and an Azure AI Search index generated data by the customer, once installed/deployed, which is integrated into a web application to provide a natural language interface for search queries. The repository showcases a sample scenario of a contract analyst who wants to review and summarize relevant contracts, and another use case for a wealth advisor who is interested in reviewing market and fund documents.
 
 ### What can Chat with your data Solution Accelerator do?
 
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -126,13 +126,13 @@ param azureOpenAIResourceName string = 'oai-${resourceToken}'
 param azureOpenAISkuName string = 'S0'
 
 @description('Azure OpenAI Model Deployment Name')
-param azureOpenAIModel string = 'gpt-4o'
+param azureOpenAIModel string = 'gpt-4.1'
 
 @description('Azure OpenAI Model Name')
-param azureOpenAIModelName string = 'gpt-4o'
+param azureOpenAIModelName string = 'gpt-4.1'
 
 @description('Azure OpenAI Model Version')
-param azureOpenAIModelVersion string = '2024-05-13'
+param azureOpenAIModelVersion string = '2025-04-14'
 
 @description('Azure OpenAI Model Capacity - See here for more info  https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/quota')
 param azureOpenAIModelCapacity int = 30
@@ -404,7 +404,7 @@ var defaultOpenAiDeployments = [
       version: azureOpenAIModelVersion
     }
     sku: {
-      name: 'Standard'
+      name: 'GlobalStandard'
       capacity: azureOpenAIModelCapacity
     }
   }
diff --git a/infra/main.bicepparam b/infra/main.bicepparam
@@ -32,9 +32,9 @@ param azureSearchOffsetColumn = readEnvironmentVariable('AZURE_SEARCH_OFFSET_COL
 
 // OpenAI parameters
 param azureOpenAIApiVersion = readEnvironmentVariable('AZURE_OPENAI_API_VERSION', '2024-02-01')
-param azureOpenAIModel = readEnvironmentVariable('AZURE_OPENAI_MODEL', 'gpt-4o')
-param azureOpenAIModelName = readEnvironmentVariable('AZURE_OPENAI_MODEL_NAME', 'gpt-4o')
-param azureOpenAIModelVersion = readEnvironmentVariable('AZURE_OPENAI_MODEL_VERSION', '2024-05-13')
+param azureOpenAIModel = readEnvironmentVariable('AZURE_OPENAI_MODEL', 'gpt-4.1')
+param azureOpenAIModelName = readEnvironmentVariable('AZURE_OPENAI_MODEL_NAME', 'gpt-4.1')
+param azureOpenAIModelVersion = readEnvironmentVariable('AZURE_OPENAI_MODEL_VERSION', '2025-04-14')
 param azureOpenAIModelCapacity = int(readEnvironmentVariable('AZURE_OPENAI_MODEL_CAPACITY', '30'))
 param useAdvancedImageProcessing = bool(readEnvironmentVariable('USE_ADVANCED_IMAGE_PROCESSING', 'false'))
 param advancedImageProcessingMaxImages = int(readEnvironmentVariable('ADVANCED_IMAGE_PROCESSING_MAX_IMAGES', '1'))
diff --git a/infra/main.json b/infra/main.json
diff --git a/infra/prompt-flow/cwyd/flow.dag.template.yaml b/infra/prompt-flow/cwyd/flow.dag.template.yaml
diff --git a/scripts/checkquota.sh b/scripts/checkquota.sh
diff --git a/tests/e2e-test/README.md b/tests/e2e-test/README.md
diff --git a/tests/e2e-test/sample_dotenv_file.txt b/tests/e2e-test/sample_dotenv_file.txt
diff --git a/tests/e2e-test/tests/test_chat_with_your_data.py b/tests/e2e-test/tests/test_chat_with_your_data.py

Original file line number	Diff line number	Diff line change
`@@ -174,10 +174,10 @@ def __load_config(self, **kwargs) -> None:`
`174`	`174`	`else:`
`175`	`175`	`# Otherwise, fallback to individual environment variables`
`176`	`176`	`self.AZURE_OPENAI_MODEL = os.getenv(`
`177`		`- "AZURE_OPENAI_MODEL", "gpt-4o"`
	`177`	`+ "AZURE_OPENAI_MODEL", "gpt-4.1"`
`178`	`178`	`)`
`179`	`179`	`self.AZURE_OPENAI_MODEL_NAME = os.getenv(`
`180`		`- "AZURE_OPENAI_MODEL_NAME", "gpt-4o"`
	`180`	`+ "AZURE_OPENAI_MODEL_NAME", "gpt-4.1"`
`181`	`181`	`)`
`182`	`182`
`183`	`183`	`self.AZURE_OPENAI_VISION_MODEL = os.getenv("AZURE_OPENAI_VISION_MODEL", "gpt-4")`