MicrosoftDocs
diff --git a/‎articles/ai-services/document-intelligence/quickstarts/includes/csharp-sdk.md
Lines changed: 12 additions & 2 deletions b/‎articles/ai-services/document-intelligence/quickstarts/includes/csharp-sdk.md
Lines changed: 12 additions & 2 deletions
diff --git a/‎articles/ai-services/document-intelligence/quickstarts/includes/java-sdk.md
Lines changed: 24 additions & 15 deletions b/‎articles/ai-services/document-intelligence/quickstarts/includes/java-sdk.md
Lines changed: 24 additions & 15 deletions
diff --git a/‎articles/ai-services/document-intelligence/quickstarts/includes/javascript-sdk.md
Lines changed: 60 additions & 52 deletions b/‎articles/ai-services/document-intelligence/quickstarts/includes/javascript-sdk.md
Lines changed: 60 additions & 52 deletions
diff --git a/‎articles/ai-services/document-intelligence/quickstarts/includes/python-sdk.md
Lines changed: 8 additions & 8 deletions b/‎articles/ai-services/document-intelligence/quickstarts/includes/python-sdk.md
Lines changed: 8 additions & 8 deletions
diff --git a/‎articles/ai-services/openai/concepts/provisioned-throughput.md
Lines changed: 15 additions & 7 deletions b/‎articles/ai-services/openai/concepts/provisioned-throughput.md
Lines changed: 15 additions & 7 deletions
@@ -209,7 +209,12 @@ DocumentIntelligenceClient client = new DocumentIntelligenceClient(new Uri(endpo
 //sample document
 Uri fileUri = new Uri ("https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf");
 
-Operation<AnalyzeResult> operation = await client.AnalyzeDocumentAsync(WaitUntil.Completed, "prebuilt-layout", fileUri);
+AnalyzeDocumentContent content = new AnalyzeDocumentContent()
+{
+    UrlSource= fileUri
+};
+
+Operation<AnalyzeResult> operation = await client.AnalyzeDocumentAsync(WaitUntil.Completed, "prebuilt-layout", content);
 
 AnalyzeResult result = operation.Value;
 
@@ -641,7 +646,12 @@ DocumentIntelligenceClient client = new DocumentIntelligenceClient(new Uri(endpo
 
 Uri invoiceUri = new Uri ("https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-invoice.pdf");
 
-Operation<AnalyzeResult> operation = await client.AnalyzeDocumentAsync(WaitUntil.Completed, "prebuilt-invoice", invoiceUri);
+AnalyzeDocumentContent content = new AnalyzeDocumentContent()
+{
+    UrlSource = invoiceUri
+};
+
+Operation<AnalyzeResult> operation = await client.AnalyzeDocumentAsync(WaitUntil.Completed, "prebuilt-invoice", content);
 
 AnalyzeResult result = operation.Value;
 
 
@@ -6,7 +6,7 @@ author: laujan
 manager: nitinme
 ms.service: azure-ai-document-intelligence
 ms.topic: include
-ms.date: 03/25/2024
+ms.date: 05/01/2024
 ms.author: lajanuar
 ---
 <!-- markdownlint-disable MD025 -->
@@ -284,18 +284,25 @@ public class DocIntelligence {
 
   public static void main(String[] args) {
 
-    // create your `DocumentAnalysisClient` instance and `AzureKeyCredential` variable
+    // create your `DocumentIntelligenceClient` instance and `AzureKeyCredential` variable
     DocumentIntelligenceClient client = new DocumentIntelligenceClientBuilder()
       .credential(new AzureKeyCredential(key))
       .endpoint(endpoint)
       .buildClient();
 
     // sample document
-    String documentUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf";
     String modelId = "prebuilt-layout";
+    String documentUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf";
 
-    SyncPoller <AnalyzeResultOperation, AnalyzeResultOperation> analyzeLayoutResultPoller =
-      client.beginAnalyzeDocument(modelId, documentUrl);
+    SyncPoller <AnalyzeResultOperation, AnalyzeResultOperation> analyzeLayoutPoller =
+      client.beginAnalyzeDocument(modelId,
+          null,
+          null,
+          null,
+          null,
+          null,
+          null,
+          new AnalyzeDocumentRequest().setUrlSource(documentUrl));
 
     AnalyzeResult analyzeLayoutResult = analyzeLayoutPoller.getFinalResult().getAnalyzeResult();
 
@@ -658,25 +665,27 @@ public class DocIntelligence {
 
   public static void main(String[] args) {
 
-    // create your `DocumentAnalysisClient` instance and `AzureKeyCredential` variable
-    DocumentIntelligenceClient client = new DocumentIntelligenceClientBuilder()
-      .credential(new AzureKeyCredential(key))
-      .endpoint(endpoint)
-      .buildClient();
-
     // sample document
     String modelId = "prebuilt-invoice";
     String invoiceUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-invoice.pdf";
 
     public static void main(final String[] args) throws IOException {
+
       // Instantiate a client that will be used to call the service.
       DocumentIntelligenceClient client = new DocumentIntelligenceClientBuilder()
-        .credential(new AzureKeyCredential("{key}"))
-        .endpoint("https://{endpoint}.cognitiveservices.azure.com/")
+        .credential(new AzureKeyCredential(key))
+        .endpoint(endpoint)
         .buildClient();
 
-      SyncPoller < OperationResult, AnalyzeResult > analyzeLayoutResultPoller =
-        client.beginAnalyzeDocument(modelId, invoiceUrl);
+      SyncPoller<AnalyzeResultOperation, AnalyzeResultOperation > analyzeInvoicesPoller =
+        client.beginAnalyzeDocument(modelId, 
+            null,
+            null,
+            null,
+            null,
+            null,
+            null,
+            new AnalyzeDocumentRequest().setUrlSource(invoiceUrl));
 
       AnalyzeResult analyzeInvoiceResult = analyzeInvoicesPoller.getFinalResult().getAnalyzeResult();
 
 
@@ -6,7 +6,7 @@ author: laujan
 manager: nitinme
 ms.service: azure-ai-document-intelligence
 ms.topic: include
-ms.date: 03/25/2024
+ms.date: 05/02/2024
 ms.author: lajanuar
 ---
 <!-- markdownlint-disable MD025 -->
@@ -146,47 +146,44 @@ Extract text, selection marks, text styles, table structures, and bounding regio
 :::moniker range="doc-intel-4.0.0"
 
 ```javascript
-    const { DocumentIntelligenceClient } = require("@azure-rest/ai-document-intelligence");
-    const  { AzureKeyCredential } = require("@azure/core-auth");
+    const DocumentIntelligenceClient = require("@azure-rest/ai-document-intelligence");
 
     // set `<your-key>` and `<your-endpoint>` variables with the values from the Azure portal.
     const key = "<your-key";
     const endpoint = "<your-endpoint>";
 
     // sample document
-  const formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
+    const formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
 
-async function main() {
-    const client = DocumentIntelligenceClient(endpoint, new AzureKeyCredential(key));
+   async function main() {
+    const client = DocumentIntelligenceClient(endpoint, {key:key},);
 
-    const poller = await client.beginAnalyzeDocument("prebuilt-layout", formUrl);
 
-const {
-        pages,
-        tables
-    } = await poller.pollUntilDone();
+    const initialResponse = await client
+      .path("/documentModels/{modelId}:analyze", "prebuilt-layout")
+      .post({
+        contentType: "application/json",
+        body: {
+          urlSource: formUrl
+        },
+       });
 
-    if (pages.length <= 0) {
-        console.log("No pages were extracted from the document.");
-    } else {
-        console.log("Pages:");
-        for (const page of pages) {
-            console.log("- Page", page.pageNumber, `(unit: ${page.unit})`);
-            console.log(`  ${page.width}x${page.height}, angle: ${page.angle}`);
-            console.log(`  ${page.lines.length} lines, ${page.words.length} words`);
-        }
-    }
+    const poller = await getLongRunningPoller(client, initialResponse);
+    const analyzeResult = (await poller.pollUntilDone()).body.analyzeResult;
 
-    if (tables.length <= 0) {
-        console.log("No tables were extracted from the document.");
-    } else {
-        console.log("Tables:");
-        for (const table of tables) {
-            console.log(
-                `- Extracted table: ${table.columnCount} columns, ${table.rowCount} rows (${table.cells.length} cells)`
-            );
-        }
+    const documents = analyzeResult?.documents;
+
+    const document = documents && documents[0];
+    if (!document) {
+    throw new Error("Expected at least one document in the result.");
     }
+
+    console.log(
+    "Extracted document:",
+    document.docType,
+    `(confidence: ${document.confidence || "<undefined>"})`,
+    );
+    console.log("Fields:", document.fields);
 }
 
 main().catch((error) => {
@@ -312,8 +309,7 @@ In this example, we analyze an invoice using the **prebuilt-invoice** model.
 
 ```javascript
 
-const { DocumentIntelligenceClient } = require("@azure-rest/ai-document-intelligence");
-const  { AzureKeyCredential } = require("@azure/core-auth");
+const DocumentIntelligenceClient = require("@azure-rest/ai-document-intelligence");
 
     // set `<your-key>` and `<your-endpoint>` variables with the values from the Azure portal.
     const key = "<your-key>";
@@ -323,32 +319,44 @@ const  { AzureKeyCredential } = require("@azure/core-auth");
     invoiceUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-invoice.pdf"
 
 async function main() {
-    const client = DocumentIntelligenceClient(endpoint, new AzureKeyCredential(key));
 
-    const poller = await client.beginAnalyzeDocument("prebuilt-invoice", invoiceUrl);
-if (pages.length <= 0) {
-        console.log("No pages were extracted from the document.");
-    } else {
-        console.log("Pages:");
-        for (const page of pages) {
-            console.log("- Page", page.pageNumber, `(unit: ${page.unit})`);
-            console.log(`  ${page.width}x${page.height}, angle: ${page.angle}`);
-            console.log(`  ${page.lines.length} lines, ${page.words.length} words`);
-        }
-    }
+    const client = DocumentIntelligenceClient(endpoint, {key: key},
+     );
 
-    if (tables.length <= 0) {
-        console.log("No tables were extracted from the document.");
+    const initialResponse = await client
+    .path("/documentModels/{modelId}:analyze", "prebuilt-invoice")
+    .post({
+      contentType: "application/json",
+      body: {
+        // The Document Intelligence service will access the URL to the invoice image and extract data from it
+        urlSource: invoiceUrl,
+      },
+    });
+
+
+    const poller = await getLongRunningPoller(client, initialResponse);
+
+    poller.onProgress((state) => console.log("Operation:", state.result, state.status));
+    const analyzeResult = (await poller.pollUntilDone()).body.analyzeResult;
+
+    const documents = analyzeResult?.documents;
+
+    const result = documents && documents[0];
+    if (result) {
+      console.log(result.fields);
     } else {
-        console.log("Tables:");
-        for (const table of tables) {
-            console.log(
-                `- Extracted table: ${table.columnCount} columns, ${table.rowCount} rows (${table.cells.length} cells)`
-            );
-        }
+      throw new Error("Expected at least one invoice in the result.");
     }
+
+console.log(
+    "Extracted invoice:",
+    document.docType,
+    `(confidence: ${document.confidence || "<undefined>"})`,
+  );
+  console.log("Fields:", document.fields);
 }
 
+
 main().catch((error) => {
     console.error("An error occurred:", error);
     process.exit(1);
 
@@ -6,7 +6,7 @@ author: laujan
 manager: nitinme
 ms.service: azure-ai-document-intelligence
 ms.topic: include
-ms.date: 03/25/2024
+ms.date: 05/01/2024
 ms.author: lajanuar
 ---
 <!-- markdownlint-disable MD025 -->
@@ -135,6 +135,7 @@ import os
 from azure.core.credentials import AzureKeyCredential
 from azure.ai.documentintelligence import DocumentIntelligenceClient
 from azure.ai.documentintelligence.models import AnalyzeResult
+from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
 
 # set `<your-endpoint>` and `<your-key>` variables with the values from the Azure portal
 endpoint = "<your-endpoint>"
@@ -168,12 +169,8 @@ def analyze_layout():
     )
 
     poller = document_intelligence_client.begin_analyze_document(
-        "prebuilt-layout", formUrl
-    )
-
-    analyze_request = AnalyzeDocumentRequest(
-        url_source=formUrl
-    )
+        "prebuilt-layout", AnalyzeDocumentRequest(url_source=formUrl
+    ))
 
     result: AnalyzeResult = poller.result()
 
@@ -548,6 +545,9 @@ Analyze and extract common fields from specific document types using a prebuilt
 import os
 from azure.core.credentials import AzureKeyCredential
 from azure.ai.documentintelligence import DocumentIntelligenceClient
+from azure.ai.documentintelligence.models import AnalyzeResult
+from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
+
 
 
 # set `<your-endpoint>` and `<your-key>` variables with the values from the Azure portal
@@ -564,7 +564,7 @@ def analyze_invoice():
     )
 
     poller = document_intelligence_client.begin_analyze_document(
-        "prebuilt-invoice", invoiceUrl
+        "prebuilt-invoice", AnalyzeDocumentRequest(url_source=invoiceUrl
     )
     invoices = poller.result()
 
 
@@ -3,7 +3,7 @@ title: Azure OpenAI Service provisioned throughput
 description: Learn about provisioned throughput and Azure OpenAI. 
 ms.service: azure-ai-openai
 ms.topic: conceptual 
-ms.date: 04/29/2024 
+ms.date: 05/02/2024 
 manager: nitinme
 author: mrbullwinkle #ChrisHMSFT
 ms.author: mbullwin #chrhoder
@@ -80,22 +80,30 @@ PTUs represent an amount of model processing capacity. Similar to your computer
 
 A few high-level considerations:
 - Generations require more capacity than prompts
-- Larger calls are progressively more expensive to compute. For example, 100 calls of with a 1000 token prompt size will require less capacity than 1 call with 100,000 tokens in the prompt. This also means that the distribution of these call shapes is important in overall throughput. Traffic patterns with a wide distribution that includes some very large calls may experience lower throughput per PTU than a narrower distribution with the same average prompt & completion token sizes. 
+- Larger calls are progressively more expensive to compute. For example, 100 calls of with a 1000 token prompt size will require less capacity than 1 call with 100,000 tokens in the prompt. This also means that the distribution of these call shapes is important in overall throughput. Traffic patterns with a wide distribution that includes some very large calls may experience lower throughput per PTU than a narrower distribution with the same average prompt & completion token sizes.
 
+### How utilization performance works
 
-### How utilization enforcement works
-Provisioned deployments provide you with an allocated amount of model processing capacity to run a given model. The `Provisioned-Managed Utilization` metric in Azure Monitor measures a given deployments utilization on 1-minute increments. Provisioned-Managed deployments are optimized to ensure that accepted calls are processed with a consistent model processing time (actual end-to-end latency is dependent on a call's characteristics). When the workload exceeds the allocated PTU capacity, the service returns a 429 HTTP status code until the utilization drops down below 100%. 
+Provisioned deployments provide you with an allocated amount of model processing capacity to run a given model.
 
+In Provisioned-Managed deployments, when capacity is exceeded, the API will immediately return a 429 HTTP Status Error. This enables the user to make decisions on how to manage their traffic. Users can redirect requests to a separate deployment, to a standard pay-as-you-go instance, or leverage a retry strategy to manage a given request. The service will continue to return the 429 HTTP status code until the utilization drops below 100%.
+
+### How can I monitor capacity?
+
+The [Provisioned-Managed Utilization V2 metric](../how-to/monitoring.md#azure-openai-metrics) in Azure Monitor measures a given deployments utilization on 1-minute increments. Provisioned-Managed deployments are optimized to ensure that accepted calls are processed with a consistent model processing time (actual end-to-end latency is dependent on a call's characteristics).  
 
 #### What should  I do when I receive a 429 response?
 The 429 response isn't an error, but instead part of the design for telling users that a given deployment is fully utilized at a point in time. By providing a fast-fail response, you have control over how to handle these situations in a way that best fits your application requirements.
 
 The  `retry-after-ms` and `retry-after` headers in the response tell you the time to wait before the next call will be accepted. How you choose to handle this response depends on your application requirements. Here are some considerations:
--	You can consider redirecting the traffic to other models, deployments or experiences. This option is the lowest-latency solution because the action can be taken as soon as you receive the 429 signal.
+-	You can consider redirecting the traffic to other models, deployments or experiences. This option is the lowest-latency solution because the action can be taken as soon as you receive the 429 signal. For ideas on how to effectively implement this pattern see this [community post](https://github.com/Azure/aoai-apim).
 -	If you're okay with longer per-call latencies, implement client-side retry logic. This option gives you the highest amount of throughput per PTU. The Azure OpenAI client libraries include built-in capabilities for handling retries.
 
 #### How does the service decide when to send a 429?
-We use a variation of the leaky bucket algorithm to maintain utilization below 100% while allowing some burstiness in the traffic. The high-level logic is as follows:
+
+In the Provisioned-Managed offering, each request is evaluated individually according to its prompt size, expected generation size, and model to determine its expected utilization. This is in contrast to pay-as-you-go deployments which have a [custom rate limiting behavior](../how-to/quota.md) based on the estimated traffic load. For pay-as-you-go deployments this can lead to HTTP 429s being generated prior to defined quota values being exceeded if traffic is not evenly distributed.
+
+For Provisioned-Managed, we use a variation of the leaky bucket algorithm to maintain utilization below 100% while allowing some burstiness in the traffic. The high-level logic is as follows:
 1.	Each customer has a set amount of capacity they can utilize on a deployment
 2.	When a request is made:
 
@@ -118,7 +126,7 @@ We use a variation of the leaky bucket algorithm to maintain utilization below 1
 
 #### How many concurrent calls can I have on my deployment?
 
-The number of concurrent calls you can achieve depends on each call's shape (prompt size, max_token parameter, etc). The service will continue to accept calls until the utilization reach 100%. To determine the approximate number of concurrent calls you can model out the maximum requests per minute for a particular call shape in the [capacity calculator](https://oai.azure.com/portal/calculator). If the system generates less than the number of samplings tokens like max_token, it will accept more requests.
+The number of concurrent calls you can achieve depends on each call's shape (prompt size, max_token parameter, etc.). The service will continue to accept calls until the utilization reach 100%. To determine the approximate number of concurrent calls you can model out the maximum requests per minute for a particular call shape in the [capacity calculator](https://oai.azure.com/portal/calculator). If the system generates less than the number of samplings tokens like max_token, it will accept more requests.
 
 ## Next steps