fix: review

santiagxf · santiagxf · commit 46067ad59daa · 2025-01-25T14:57:25.000-05:00
diff --git a/articles/ai-foundry/model-inference/how-to/inference.md b/articles/ai-foundry/model-inference/how-to/inference.md
@@ -34,7 +34,7 @@ To learn more about how to apply the **Azure OpenAI endpoint** see [Azure OpenAI
 
 ## Using the routing capability in the Azure AI model inference endpoint
 
-The inference endpoint routes requests to a given deployment by matching the parameter `name` inside of the request to the name of the deployment. This means that *deployments work as an alias of a given model under certain configurations*. This flexibility allows you to deploy a given model multiple times in the service but under different configurations if needed.
+The inference endpoint routes requests to a given deployment by matching the parameter `name` inside of the request to the name of the deployment. This means that *deployments work as an alias of a given model under certain configurations*. This flexibility allows you to deploy a given model multiple times in the service but under different configurations if needed. The inference endpoint usually has the form `https://<resource-name>.services.ai.azure.com/models`.
 
 :::image type="content" source="../media/endpoint/endpoint-routing.png" alt-text="An illustration showing how routing works for a Meta-llama-3.2-8b-instruct model by indicating such name in the parameter 'model' inside of the payload request." lightbox="../media/endpoint/endpoint-routing.png":::
 
diff --git a/articles/ai-foundry/model-inference/includes/code-create-chat-client-entra.md b/articles/ai-foundry/model-inference/includes/code-create-chat-client-entra.md
@@ -26,7 +26,7 @@ from azure.ai.inference import ChatCompletionsClient
 from azure.identity import AzureDefaultCredential
 
 model = ChatCompletionsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureDefaultCredential(),
     model="mistral-large-2407",
 )
@@ -48,7 +48,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureDefaultCredential } from "@azure/identity";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureDefaultCredential(),
     "mistral-large-2407"
 );
@@ -80,7 +80,7 @@ Then, you can use the package to consume the model. The following example shows
 
 ```csharp
 ChatCompletionsClient client = new ChatCompletionsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new AzureDefaultCredential(includeInteractiveCredentials: true),
     "mistral-large-2407"
 );
@@ -108,7 +108,7 @@ Then, you can use the package to consume the model. The following example shows
 ```java
 ChatCompletionsClient client = new ChatCompletionsClientBuilder()
     .credential(new DefaultAzureCredential()))
-    .endpoint("{endpoint}")
+    .endpoint("https://<resource>.services.ai.azure.com/models")
     .model("mistral-large-2407")
     .buildClient();
 ```
@@ -122,7 +122,7 @@ Use the reference section to explore the API design and which parameters are ava
 __Request__
 
 ```HTTP/1.1
-POST models/chat/completions?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-chat-client.md b/articles/ai-foundry/model-inference/includes/code-create-chat-client.md
@@ -26,7 +26,7 @@ from azure.ai.inference import ChatCompletionsClient
 from azure.core.credentials import AzureKeyCredential
 
 model = ChatCompletionsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureKeyCredential(os.environ["AZUREAI_ENDPOINT_KEY"]),
 )
 ```
@@ -49,7 +49,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureKeyCredential(process.env.AZUREAI_ENDPOINT_KEY)
 );
 ```
@@ -76,7 +76,7 @@ Then, you can use the package to consume the model. The following example shows
 
 ```csharp
 ChatCompletionsClient client = new ChatCompletionsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL"))
 );
 ```
@@ -114,7 +114,7 @@ Use the reference section to explore the API design and which parameters are ava
 __Request__
 
 ```HTTP/1.1
-POST models/chat/completions?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-chat-completion.md b/articles/ai-foundry/model-inference/includes/code-create-chat-completion.md
@@ -77,7 +77,7 @@ for (ChatChoice choice : chatCompletions.getChoices()) {
 __Request__
 
 ```HTTP/1.1
-POST models/chat/completions?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-embeddings-client.md b/articles/ai-foundry/model-inference/includes/code-create-embeddings-client.md
@@ -26,7 +26,7 @@ from azure.ai.inference import EmbeddingsClient
 from azure.core.credentials import AzureKeyCredential
 
 client = EmbeddingsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureKeyCredential(os.environ["AZUREAI_ENDPOINT_KEY"]),
 )
 ```
@@ -39,7 +39,7 @@ from azure.ai.inference import EmbeddingsClient
 from azure.identity import AzureDefaultCredential
 
 client = EmbeddingsClient(
-    endpoint=os.environ["AZUREAI_ENDPOINT_URL"],
+    endpoint="https://<resource>.services.ai.azure.com/models",
     credential=AzureDefaultCredential(),
 )
 ```
@@ -62,7 +62,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureKeyCredential(process.env.AZUREAI_ENDPOINT_KEY)
 );
 ```
@@ -75,7 +75,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureDefaultCredential } from "@azure/identity";
 
 const client = new ModelClient(
-    process.env.AZUREAI_ENDPOINT_URL, 
+    "https://<resource>.services.ai.azure.com/models", 
     new AzureDefaultCredential()
 );
 ```
@@ -108,7 +108,7 @@ Then, you can use the package to consume the model. The following example shows
 
 ```csharp
 EmbeddingsClient client = new EmbeddingsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL"))
 );
 ```
@@ -117,7 +117,7 @@ For endpoint with support for Microsoft Entra ID (formerly Azure Active Director
 
 ```csharp
 EmbeddingsClient client = new EmbeddingsClient(
-    new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
+    new Uri("https://<resource>.services.ai.azure.com/models"),
     new DefaultAzureCredential(includeInteractiveCredentials: true)
 );
 ```
@@ -131,7 +131,7 @@ Use the reference section to explore the API design and which parameters are ava
 __Request__
 
 ```HTTP/1.1
-POST models/embeddings?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-create-embeddings.md b/articles/ai-foundry/model-inference/includes/code-create-embeddings.md
@@ -53,7 +53,7 @@ Console.WriteLine($"Response: {response.Data.Embeddings}");
 __Request__
 
 ```HTTP/1.1
-POST models/embeddings?api-version=2024-04-01-preview
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/code-manage-content-filtering.md b/articles/ai-foundry/model-inference/includes/code-manage-content-filtering.md
@@ -122,7 +122,7 @@ try {
 __Request__
 
 ```HTTP/1.1
-POST /chat/completions?api-version=2024-04-01-preview
+POST /chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <bearer-token>
 Content-Type: application/json
 ```
diff --git a/articles/ai-foundry/model-inference/includes/how-to-prerequisites.md b/articles/ai-foundry/model-inference/includes/how-to-prerequisites.md
@@ -9,7 +9,7 @@ author: santiagxf
 
 * An Azure subscription. If you're using [GitHub Models](https://docs.github.com/en/github-models/), you can upgrade your experience and create an Azure subscription in the process. Read [Upgrade from GitHub Models to Azure AI model inference](../how-to/quickstart-github-models.md) if that's your case.
 
-* An Azure AI services resource. For more information, see [Create an Azure AI Services resource](../../../ai-services/multi-service-resource.md?context=/azure/ai-services/model-inference/context/context).
+* An Azure AI services resource. For more information, see [Create an Azure AI Services resource](../how-to/quickstart-create-resources.md).
 
 * The endpoint URL and key.
 
diff --git a/articles/ai-foundry/model-inference/includes/use-chat-completions/rest.md b/articles/ai-foundry/model-inference/includes/use-chat-completions/rest.md
@@ -28,18 +28,18 @@ To use chat completion models in your application, you need:
 
 ## Use chat completions
 
-To use the text embeddings, use the route `/chat/completions` along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
+To use the text embeddings, use the route `/chat/completions` appended to the base URL along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
 
 ```http
-POST /chat/completions
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Content-Type: application/json
 api-key: <key>
 ```
 
 If you have configured the resource with **Microsoft Entra ID** support, pass you token in the `Authorization` header:
 
 ```http
-POST /chat/completions
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Content-Type: application/json
 Authorization: Bearer <token>
 ```
@@ -287,8 +287,7 @@ Some models can create JSON outputs. Set `response_format` to `json_object` to e
 The Azure AI Model Inference API allows you to pass extra parameters to the model. The following code example shows how to pass the extra parameter `logprobs` to the model. 
 
 ```http
-POST /chat/completions HTTP/1.1
-Host: <ENDPOINT_URI>
+POST https://<resource>.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview
 Authorization: Bearer <TOKEN>
 Content-Type: application/json
 extra-parameters: pass-through
@@ -565,7 +564,7 @@ Now, create a chat completion request with the image:
 
 ```json
 {
-    "model": "mistral-large-2407",
+    "model": "phi-3.5-vision-instruct",
     "messages": [
         {
             "role": "user",
@@ -597,7 +596,7 @@ The response is as follows, where you can see the model's usage statistics:
     "id": "0a1234b5de6789f01gh2i345j6789klm",
     "object": "chat.completion",
     "created": 1718726686,
-    "model": "mistral-large-2407",
+    "model": "phi-3.5-vision-instruct",
     "choices": [
         {
             "index": 0,
diff --git a/articles/ai-foundry/model-inference/includes/use-embeddings/rest.md b/articles/ai-foundry/model-inference/includes/use-embeddings/rest.md
@@ -28,18 +28,18 @@ To use embedding models in your application, you need:
 
 ## Use embeddings
 
-To use the text embeddings, use the route `/embeddings` along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
+To use the text embeddings, use the route `/embeddings` appended to the base URL along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
 
 ```http
-POST /embeddings
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Content-Type: application/json
 api-key: <key>
 ```
 
 If you have configured the resource with **Microsoft Entra ID** support, pass you token in the `Authorization` header:
 
 ```http
-POST /embeddings
+POST https://<resource>.services.ai.azure.com/models/embeddings?api-version=2024-05-01-preview
 Content-Type: application/json
 Authorization: Bearer <token>
 ```
diff --git a/articles/ai-foundry/model-inference/includes/use-image-embeddings/rest.md b/articles/ai-foundry/model-inference/includes/use-image-embeddings/rest.md
@@ -30,18 +30,18 @@ To use embedding models in your application, you need:
 
 ## Use embeddings
 
-To use the text embeddings, use the route `/images/embeddings` along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
+To use the text embeddings, use the route `/images/embeddings` appended to your base URL along with your credential indicated in `api-key`. `Authorization` header is also supported with the format `Bearer <key>`.
 
 ```http
-POST /images/embeddings
+POST https://<resource>.services.ai.azure.com/models/images/embeddings
 Content-Type: application/json
 api-key: <key>
 ```
 
 If you configured the resource with **Microsoft Entra ID** support, pass you token in the `Authorization` header:
 
 ```http
-POST /images/embeddings
+POST https://<resource>.services.ai.azure.com/models/images/embeddings
 Content-Type: application/json
 Authorization: Bearer <token>
 ```
diff --git a/articles/ai-foundry/model-inference/index.yml b/articles/ai-foundry/model-inference/index.yml
@@ -58,6 +58,8 @@ landingContent:
             url: ./how-to/configure-content-filters.md
           - text: Configure blocklists (preview)
             url: ./how-to/use-blocklists.md
+          - text: Configure key-less authentication
+            url: ./how-to/configure-entra-id.md
           - text: Manage cost
             url: ./how-to/manage-costs.md
           - text: Create resources
diff --git a/articles/ai-foundry/model-inference/toc.yml b/articles/ai-foundry/model-inference/toc.yml
@@ -34,7 +34,7 @@ items:
       href: ./concepts/deployment-types.md
     - name: Model versions
       href: ./concepts/model-versions.md
-    - name: Safety and compliance
+    - name: Responsible AI
       items:
       - name: Content filtering
         href: ./concepts/content-filter.md
@@ -48,14 +48,18 @@ items:
     href: ./how-to/github/create-model-deployments.md
   - name: Connect your AI project
     href: ./how-to/configure-project-connection.md
-  - name: Safety and compliance
+  - name: Responsible AI
     items:
       - name: Configure content filtering
         href: ./how-to/configure-content-filters.md
       - name: Use blocklists
         href: ./how-to/use-blocklists.md
-  - name: Configure key-less authentication with Microsoft Entra ID
-    href: ./how-to/configure-entra-id.md
+  - name: Security & Governance
+    items:
+      - name: Configure key-less authentication
+        href: ./how-to/configure-entra-id.md
+      - name: Control model deployment with custom policies
+        href: /azure/ai-studio/how-to/custom-policy-model-deployment?context=/azure/ai-foundry/model-inference/context/context
   - name: Manage cost
     href: ./how-to/manage-costs.md
   - name: Quotas and limits