Merge pull request #2516 from santiagxf/santiagxf/quality-review

JamesJBarnett · web-flow · commit 43e0779b3b1e · 2025-01-25T16:34:20.000-07:00
Quality general review
diff --git a/articles/ai-foundry/model-inference/how-to/inference.md b/articles/ai-foundry/model-inference/how-to/inference.md
@@ -26,9 +26,9 @@ Azure AI services expose multiple endpoints depending on the type of work you're
 > * Azure AI model inference endpoint
 > * Azure OpenAI endpoint
 
-The **Azure AI inference endpoint** allows customers to use a single endpoint with the same authentication and schema to generate inference for the deployed models in the resource. All the models support this capability. This endpoint follows the [Azure AI model inference API](../../../ai-studio/reference/reference-model-inference-api.md).
+The **Azure AI inference endpoint** (usually with the form `https://<resource-name>.services.ai.azure.com/models`) allows customers to use a single endpoint with the same authentication and schema to generate inference for the deployed models in the resource. All the models support this capability. This endpoint follows the [Azure AI model inference API](../../../ai-studio/reference/reference-model-inference-api.md). 
 
-**Azure OpenAI** models deployed to AI services also support the Azure OpenAI API. This endpoint exposes the full capabilities of OpenAI models and supports more features like assistants, threads, files, and batch inference.
+**Azure OpenAI** models deployed to AI services also support the Azure OpenAI API (usually with the form `https://<resource-name>.openai.azure.com`). This endpoint exposes the full capabilities of OpenAI models and supports more features like assistants, threads, files, and batch inference.
 
 To learn more about how to apply the **Azure OpenAI endpoint** see [Azure OpenAI service documentation](../../../ai-services/openai/overview.md).
 
diff --git a/articles/ai-foundry/model-inference/includes/code-create-chat-client-entra.md b/articles/ai-foundry/model-inference/includes/code-create-chat-client-entra.md
@@ -26,7 +26,7 @@ from azure.ai.inference import ChatCompletionsClient
 from azure.identity import AzureDefaultCredential
 
 model = ChatCompletionsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureDefaultCredential(),
     model="mistral-large-2407",
 )
@@ -48,7 +48,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureDefaultCredential } from "@azure/identity";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureDefaultCredential(),
     "mistral-large-2407"
 );
@@ -80,7 +80,7 @@ Then, you can use the package to consume the model. The following example shows
 
 ```csharp
 ChatCompletionsClient client = new ChatCompletionsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new AzureDefaultCredential(includeInteractiveCredentials: true),
     "mistral-large-2407"
 );
@@ -108,7 +108,7 @@ Then, you can use the package to consume the model. The following example shows
 ```java
 ChatCompletionsClient client = new ChatCompletionsClientBuilder()
     .credential(new DefaultAzureCredential()))
-    .endpoint("{endpoint}")
+    .endpoint("https://<resource>.services.ai.azure.com/models")
     .model("mistral-large-2407")
     .buildClient();
 ```
@@ -122,7 +122,7 @@ Use the reference section to explore the API design and which parameters are ava
 __Request__
 
 ```HTTP/1.1
-POST models/chat/completions?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-chat-client.md b/articles/ai-foundry/model-inference/includes/code-create-chat-client.md
@@ -26,7 +26,7 @@ from azure.ai.inference import ChatCompletionsClient
 from azure.core.credentials import AzureKeyCredential
 
 model = ChatCompletionsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureKeyCredential(os.environ["AZUREAI_ENDPOINT_KEY"]),
 )
 ```
@@ -49,7 +49,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureKeyCredential(process.env.AZUREAI_ENDPOINT_KEY)
 );
 ```
@@ -76,7 +76,7 @@ Then, you can use the package to consume the model. The following example shows
 
 ```csharp
 ChatCompletionsClient client = new ChatCompletionsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL"))
 );
 ```
@@ -114,7 +114,7 @@ Use the reference section to explore the API design and which parameters are ava
 __Request__
 
 ```HTTP/1.1
-POST models/chat/completions?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-chat-completion.md b/articles/ai-foundry/model-inference/includes/code-create-chat-completion.md
@@ -77,7 +77,7 @@ for (ChatChoice choice : chatCompletions.getChoices()) {
 __Request__
 
 ```HTTP/1.1
-POST models/chat/completions?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-embeddings-client.md b/articles/ai-foundry/model-inference/includes/code-create-embeddings-client.md
@@ -26,7 +26,7 @@ from azure.ai.inference import EmbeddingsClient
 from azure.core.credentials import AzureKeyCredential
 
 client = EmbeddingsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureKeyCredential(os.environ["AZUREAI_ENDPOINT_KEY"]),
 )
 ```
@@ -39,7 +39,7 @@ from azure.ai.inference import EmbeddingsClient
 from azure.identity import AzureDefaultCredential
 
 client = EmbeddingsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureDefaultCredential(),
 )
 ```
@@ -62,7 +62,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureKeyCredential(process.env.AZUREAI_ENDPOINT_KEY)
 );
 ```
@@ -75,7 +75,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureDefaultCredential } from "@azure/identity";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureDefaultCredential()
 );
 ```
@@ -108,7 +108,7 @@ Then, you can use the package to consume the model. The following example shows
 
 ```csharp
 EmbeddingsClient client = new EmbeddingsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL"))
 );
 ```
@@ -117,7 +117,7 @@ For endpoint with support for Microsoft Entra ID (formerly Azure Active Director
 
 ```csharp
 EmbeddingsClient client = new EmbeddingsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new DefaultAzureCredential(includeInteractiveCredentials: true)
 );
 ```
@@ -131,7 +131,7 @@ Use the reference section to explore the API design and which parameters are ava
 __Request__
 
 ```HTTP/1.1
-POST models/embeddings?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-embeddings.md b/articles/ai-foundry/model-inference/includes/code-create-embeddings.md
@@ -53,7 +53,7 @@ Console.WriteLine($"Response: {response.Data.Embeddings}");
 __Request__
 
 ```HTTP/1.1
-POST models/embeddings?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-manage-content-filtering.md b/articles/ai-foundry/model-inference/includes/code-manage-content-filtering.md
@@ -122,7 +122,7 @@ try {
 __Request__
 
 ```HTTP/1.1
-POST /chat/completions?api-version=2024-04-01-preview
+POST /chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/how-to-prerequisites.md b/articles/ai-foundry/model-inference/includes/how-to-prerequisites.md
@@ -9,7 +9,7 @@ author: santiagxf
 
 * An Azure subscription. If you're using [GitHub Models](https://docs.github.com/en/github-models/), you can upgrade your experience and create an Azure subscription in the process. Read [Upgrade from GitHub Models to Azure AI model inference](../how-to/quickstart-github-models.md) if that's your case.
 
-* An Azure AI services resource. For more information, see [Create an Azure AI Services resource](../../../ai-services/multi-service-resource.md?context=/azure/ai-services/model-inference/context/context).
+* An Azure AI services resource. For more information, see [Create an Azure AI Services resource](../how-to/quickstart-create-resources.md).
 
 * The endpoint URL and key.
 
diff --git a/articles/ai-foundry/model-inference/includes/use-chat-completions/rest.md b/articles/ai-foundry/model-inference/includes/use-chat-completions/rest.md
@@ -28,18 +28,18 @@ To use chat completion models in your application, you need:
 
 ## Use chat completions
 
-To use the text embeddings, use the route `/chat/completions` along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
+To use the text embeddings, use the route `/chat/completions` appended to the base URL along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
 
 ```http
-POST /chat/completions
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Content-Type: application/json
 api-key: <key>
 ```
 
 If you have configured the resource with **Microsoft Entra ID** support, pass you token in the `Authorization` header:
 
 ```http
-POST /chat/completions
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Content-Type: application/json
 Authorization: Bearer <token>
 ```
@@ -287,8 +287,7 @@ Some models can create JSON outputs. Set `response_format` to `json_object` to e
 The Azure AI Model Inference API allows you to pass extra parameters to the model. The following code example shows how to pass the extra parameter `logprobs` to the model. 
 
 ```http
-POST /chat/completions HTTP/1.1
-Host: <ENDPOINT_URI>
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <TOKEN>
 Content-Type: application/json
 extra-parameters: pass-through
@@ -565,7 +564,7 @@ Now, create a chat completion request with the image:
 
 ```json
 {
-    "model": "mistral-large-2407",
+    "model": "phi-3.5-vision-instruct",
     "messages": [
         {
             "role": "user",
@@ -597,7 +596,7 @@ The response is as follows, where you can see the model's usage statistics:
     "id": "0a1234b5de6789f01gh2i345j6789klm",
     "object": "chat.completion",
     "created": 1718726686,
-    "model": "mistral-large-2407",
+    "model": "phi-3.5-vision-instruct",
     "choices": [
         {
             "index": 0,
diff --git a/articles/ai-foundry/model-inference/includes/use-embeddings/rest.md b/articles/ai-foundry/model-inference/includes/use-embeddings/rest.md
@@ -28,18 +28,18 @@ To use embedding models in your application, you need:
 
 ## Use embeddings
 
-To use the text embeddings, use the route `/embeddings` along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
+To use the text embeddings, use the route `/embeddings` appended to the base URL along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
 
 ```http
-POST /embeddings
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Content-Type: application/json
 api-key: <key>
 ```
 
 If you have configured the resource with **Microsoft Entra ID** support, pass you token in the `Authorization` header:
 
 ```http
-POST /embeddings
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Content-Type: application/json
 Authorization: Bearer <token>
 ```
diff --git a/articles/ai-foundry/model-inference/includes/use-image-embeddings/rest.md b/articles/ai-foundry/model-inference/includes/use-image-embeddings/rest.md
@@ -30,18 +30,18 @@ To use embedding models in your application, you need:
 
 ## Use embeddings
 
-To use the text embeddings, use the route `/images/embeddings` along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
+To use the text embeddings, use the route `/images/embeddings` appended to your base URL along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
 
 ```http
-POST /images/embeddings
+POST https://<resource>.services.ai.azure.com/models/images/embeddings
 Content-Type: application/json
 api-key: <key>
 ```
 
 If you configured the resource with **Microsoft Entra ID** support, pass you token in the `Authorization` header:
 
 ```http
-POST /images/embeddings
+POST https://<resource>.services.ai.azure.com/models/images/embeddings
 Content-Type: application/json
 Authorization: Bearer <token>
 ```
diff --git a/articles/ai-foundry/model-inference/index.yml b/articles/ai-foundry/model-inference/index.yml
@@ -58,6 +58,8 @@ landingContent:
             url: ./how-to/configure-content-filters.md
           - text: Configure blocklists (preview)
             url: ./how-to/use-blocklists.md
+          - text: Configure key-less authentication
+            url: ./how-to/configure-entra-id.md
           - text: Manage cost
             url: ./how-to/manage-costs.md
           - text: Create resources
diff --git a/articles/ai-foundry/model-inference/toc.yml b/articles/ai-foundry/model-inference/toc.yml
@@ -34,7 +34,7 @@ items:
       href: ./concepts/deployment-types.md
     - name: Model versions
       href: ./concepts/model-versions.md
-    - name: Safety and compliance
+    - name: Responsible AI
       items:
       - name: Content filtering
         href: ./concepts/content-filter.md
@@ -48,14 +48,18 @@ items:
     href: ./how-to/github/create-model-deployments.md
   - name: Connect your AI project
     href: ./how-to/configure-project-connection.md
-  - name: Safety and compliance
+  - name: Responsible AI
     items:
       - name: Configure content filtering
         href: ./how-to/configure-content-filters.md
       - name: Use blocklists
         href: ./how-to/use-blocklists.md
-  - name: Configure key-less authentication with Microsoft Entra ID
-    href: ./how-to/configure-entra-id.md
+  - name: Security & Governance
+    items:
+      - name: Configure key-less authentication
+        href: ./how-to/configure-entra-id.md
+      - name: Control model deployment with custom policies
+        href: /azure/ai-studio/how-to/custom-policy-model-deployment?context=/azure/ai-foundry/model-inference/context/context
   - name: Manage cost
     href: ./how-to/manage-costs.md
   - name: Quotas and limits