Skip to content

Commit 9dd7d8a

Browse files
author
Changjian Wang
committed
Add samples for content understanding SDK
- Sample12_GetResultFile: Demonstrates how to retrieve keyframe images from video analysis operations. - Sample13_DeleteResult: Shows how to delete analysis results after they are no longer needed. - Sample14_CopyAnalyzer: Illustrates how to copy an analyzer within the same resource. - Sample15_GrantCopyAuth: Demonstrates granting copy authorization for cross-resource analyzer copying. - Sample16_CreateAnalyzerWithLabels: Shows how to create an analyzer with labeled training data from Azure Blob Storage.
1 parent aad4f2e commit 9dd7d8a

16 files changed

+2700
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
// Code generated by Microsoft (R) TypeSpec Code Generator.
4+
5+
package com.azure.ai.contentunderstanding.samples;
6+
7+
import com.azure.ai.contentunderstanding.ContentUnderstandingClient;
8+
import com.azure.ai.contentunderstanding.ContentUnderstandingClientBuilder;
9+
import com.azure.ai.contentunderstanding.models.ContentUnderstandingDefaults;
10+
import com.azure.core.credential.AzureKeyCredential;
11+
import com.azure.core.http.rest.RequestOptions;
12+
import com.azure.core.http.rest.Response;
13+
import com.azure.core.util.BinaryData;
14+
import com.azure.core.util.Configuration;
15+
import com.azure.identity.DefaultAzureCredentialBuilder;
16+
17+
/**
18+
* Sample demonstrating how to configure and manage default settings for Content Understanding service.
19+
* This sample shows:
20+
* 1. Getting current default configuration
21+
* 2. Updating default configuration
22+
* 3. Verifying the updated configuration
23+
*/
24+
public class Sample00_ConfigureDefaults {
25+
26+
public static void main(String[] args) {
27+
// BEGIN: com.azure.ai.contentunderstanding.sample00.buildClient
28+
String endpoint = Configuration.getGlobalConfiguration().get("CONTENTUNDERSTANDING_ENDPOINT");
29+
String key = System.getenv("AZURE_CONTENT_UNDERSTANDING_KEY");
30+
31+
// Build the client with appropriate authentication
32+
ContentUnderstandingClientBuilder builder = new ContentUnderstandingClientBuilder().endpoint(endpoint);
33+
34+
ContentUnderstandingClient client;
35+
if (key != null && !key.trim().isEmpty()) {
36+
// Use API key authentication
37+
client = builder.credential(new AzureKeyCredential(key)).buildClient();
38+
} else {
39+
// Use default Azure credential (for managed identity, Azure CLI, etc.)
40+
client = builder.credential(new DefaultAzureCredentialBuilder().build()).buildClient();
41+
}
42+
// END: com.azure.ai.contentunderstanding.sample00.buildClient
43+
44+
// Step 1: Get current defaults
45+
System.out.println("Getting current default configuration...");
46+
ContentUnderstandingDefaults currentDefaults = client.getDefaults();
47+
System.out.println("Current defaults retrieved successfully.");
48+
System.out.println("Current configuration: " + currentDefaults);
49+
50+
// Step 2: Update defaults with the same configuration (demonstrating update)
51+
System.out.println("\nUpdating default configuration...");
52+
53+
// Convert the current defaults to BinaryData for the update request
54+
BinaryData defaultsBody = BinaryData.fromObject(currentDefaults);
55+
RequestOptions requestOptions = new RequestOptions();
56+
57+
// Update defaults with the configuration
58+
Response<BinaryData> updateResponse = client.updateDefaultsWithResponse(defaultsBody, requestOptions);
59+
60+
if (updateResponse.getStatusCode() == 200 || updateResponse.getStatusCode() == 201) {
61+
System.out.println("Defaults updated successfully.");
62+
System.out.println("Status code: " + updateResponse.getStatusCode());
63+
} else {
64+
System.err.println("Failed to update defaults. Status code: " + updateResponse.getStatusCode());
65+
}
66+
67+
// Step 3: Verify the updated configuration
68+
System.out.println("\nVerifying updated configuration...");
69+
ContentUnderstandingDefaults updatedDefaults = client.getDefaults();
70+
System.out.println("Updated defaults verified successfully.");
71+
System.out.println("Updated configuration: " + updatedDefaults);
72+
73+
System.out.println("\nConfiguration management completed.");
74+
}
75+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
// Code generated by Microsoft (R) TypeSpec Code Generator.
4+
5+
package com.azure.ai.contentunderstanding.samples;
6+
7+
import com.azure.ai.contentunderstanding.ContentUnderstandingClient;
8+
import com.azure.ai.contentunderstanding.ContentUnderstandingClientBuilder;
9+
import com.azure.ai.contentunderstanding.models.AnalyzeResult;
10+
import com.azure.ai.contentunderstanding.models.ContentAnalyzerAnalyzeOperationStatus;
11+
import com.azure.ai.contentunderstanding.models.DocumentContent;
12+
import com.azure.ai.contentunderstanding.models.DocumentPage;
13+
import com.azure.ai.contentunderstanding.models.DocumentTable;
14+
import com.azure.ai.contentunderstanding.models.MediaContent;
15+
import com.azure.core.credential.AzureKeyCredential;
16+
import com.azure.core.util.BinaryData;
17+
import com.azure.core.util.Configuration;
18+
import com.azure.core.util.polling.SyncPoller;
19+
import com.azure.identity.DefaultAzureCredentialBuilder;
20+
21+
import java.io.IOException;
22+
import java.nio.file.Files;
23+
import java.nio.file.Path;
24+
import java.nio.file.Paths;
25+
26+
/**
27+
* Sample demonstrating how to analyze binary documents using Content Understanding service.
28+
* This sample shows:
29+
* 1. Loading a binary file (PDF)
30+
* 2. Analyzing the document
31+
* 3. Extracting markdown content
32+
* 4. Accessing document properties (pages, tables, etc.)
33+
*/
34+
public class Sample01_AnalyzeBinary {
35+
36+
public static void main(String[] args) throws IOException {
37+
// BEGIN: com.azure.ai.contentunderstanding.sample01.buildClient
38+
String endpoint = Configuration.getGlobalConfiguration().get("CONTENTUNDERSTANDING_ENDPOINT");
39+
String key = System.getenv("AZURE_CONTENT_UNDERSTANDING_KEY");
40+
41+
// Build the client with appropriate authentication
42+
ContentUnderstandingClientBuilder builder = new ContentUnderstandingClientBuilder().endpoint(endpoint);
43+
44+
ContentUnderstandingClient client;
45+
if (key != null && !key.trim().isEmpty()) {
46+
// Use API key authentication
47+
client = builder.credential(new AzureKeyCredential(key)).buildClient();
48+
} else {
49+
// Use default Azure credential (for managed identity, Azure CLI, etc.)
50+
client = builder.credential(new DefaultAzureCredentialBuilder().build()).buildClient();
51+
}
52+
// END: com.azure.ai.contentunderstanding.sample01.buildClient
53+
54+
// Load the sample file
55+
String filePath = "src/test/resources/sample_invoice.pdf";
56+
Path path = Paths.get(filePath);
57+
58+
byte[] fileBytes;
59+
BinaryData binaryData;
60+
boolean hasRealFile = Files.exists(path);
61+
62+
// Check if sample file exists
63+
if (!hasRealFile) {
64+
System.out.println("⚠️ Sample file not found at " + filePath);
65+
System.out.println("Creating a minimal test PDF for demonstration...");
66+
// Create a minimal valid PDF for testing
67+
String pdfContent
68+
= "%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj 3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Resources<<>>>>endobj\nxref\n0 4\n0000000000 65535 f\n0000000009 00000 n\n0000000056 00000 n\n0000000115 00000 n\ntrailer<</Size 4/Root 1 0 R>>\nstartxref\n203\n%%EOF";
69+
fileBytes = pdfContent.getBytes();
70+
} else {
71+
fileBytes = Files.readAllBytes(path);
72+
}
73+
74+
binaryData = BinaryData.fromBytes(fileBytes);
75+
76+
// BEGIN:ContentUnderstandingAnalyzeBinaryAsync
77+
SyncPoller<ContentAnalyzerAnalyzeOperationStatus, AnalyzeResult> operation
78+
= client.beginAnalyzeBinary("prebuilt-documentSearch", "application/pdf", binaryData, null, null, null);
79+
80+
AnalyzeResult result = operation.getFinalResult();
81+
// END:ContentUnderstandingAnalyzeBinaryAsync
82+
83+
System.out.println("Analysis operation completed");
84+
System.out.println("Analysis result contains "
85+
+ (result.getContents() != null ? result.getContents().size() : 0) + " content(s)");
86+
87+
// BEGIN:ContentUnderstandingExtractMarkdown
88+
// A PDF file has only one content element even if it contains multiple pages
89+
MediaContent content = null;
90+
if (result.getContents() == null || result.getContents().isEmpty()) {
91+
System.out.println("(No content returned from analysis)");
92+
} else {
93+
content = result.getContents().get(0);
94+
if (content.getMarkdown() != null && !content.getMarkdown().isEmpty()) {
95+
System.out.println(content.getMarkdown());
96+
} else {
97+
System.out.println("(No markdown content available)");
98+
}
99+
}
100+
// END:ContentUnderstandingExtractMarkdown
101+
102+
if (hasRealFile && content != null && content.getMarkdown() != null && !content.getMarkdown().isEmpty()) {
103+
System.out
104+
.println("Markdown content extracted successfully (" + content.getMarkdown().length() + " characters)");
105+
} else {
106+
System.out
107+
.println("⚠️ Skipping markdown content validation (using minimal test PDF or no markdown available)");
108+
}
109+
110+
// BEGIN:ContentUnderstandingAccessDocumentProperties
111+
// Check if this is document content to access document-specific properties
112+
if (content instanceof DocumentContent) {
113+
DocumentContent documentContent = (DocumentContent) content;
114+
System.out.println("Document type: "
115+
+ (documentContent.getMimeType() != null ? documentContent.getMimeType() : "(unknown)"));
116+
System.out.println("Start page: " + documentContent.getStartPageNumber());
117+
System.out.println("End page: " + documentContent.getEndPageNumber());
118+
System.out.println(
119+
"Total pages: " + (documentContent.getEndPageNumber() - documentContent.getStartPageNumber() + 1));
120+
121+
// Check for pages
122+
if (documentContent.getPages() != null && !documentContent.getPages().isEmpty()) {
123+
System.out.println("Number of pages: " + documentContent.getPages().size());
124+
for (DocumentPage page : documentContent.getPages()) {
125+
String unit = documentContent.getUnit() != null ? documentContent.getUnit().toString() : "units";
126+
System.out.println(" Page " + page.getPageNumber() + ": " + page.getWidth() + " x "
127+
+ page.getHeight() + " " + unit);
128+
}
129+
}
130+
131+
// Check for tables
132+
if (documentContent.getTables() != null && !documentContent.getTables().isEmpty()) {
133+
System.out.println("Number of tables: " + documentContent.getTables().size());
134+
int tableCounter = 1;
135+
for (DocumentTable table : documentContent.getTables()) {
136+
System.out.println(" Table " + tableCounter + ": " + table.getRowCount() + " rows x "
137+
+ table.getColumnCount() + " columns");
138+
tableCounter++;
139+
}
140+
}
141+
} else {
142+
System.out.println("Content is MediaContent (not document-specific), skipping document properties");
143+
}
144+
// END:ContentUnderstandingAccessDocumentProperties
145+
146+
System.out.println("\nBinary document analysis completed successfully");
147+
}
148+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
// Code generated by Microsoft (R) TypeSpec Code Generator.
4+
5+
package com.azure.ai.contentunderstanding.samples;
6+
7+
import com.azure.ai.contentunderstanding.ContentUnderstandingClient;
8+
import com.azure.ai.contentunderstanding.ContentUnderstandingClientBuilder;
9+
import com.azure.ai.contentunderstanding.models.AnalyzeInput;
10+
import com.azure.ai.contentunderstanding.models.AnalyzeResult;
11+
import com.azure.ai.contentunderstanding.models.ContentAnalyzerAnalyzeOperationStatus;
12+
import com.azure.ai.contentunderstanding.models.DocumentContent;
13+
import com.azure.ai.contentunderstanding.models.DocumentPage;
14+
import com.azure.ai.contentunderstanding.models.DocumentTable;
15+
import com.azure.ai.contentunderstanding.models.MediaContent;
16+
import com.azure.core.credential.AzureKeyCredential;
17+
import com.azure.core.util.Configuration;
18+
import com.azure.core.util.polling.SyncPoller;
19+
import com.azure.identity.DefaultAzureCredentialBuilder;
20+
21+
import java.util.Arrays;
22+
23+
/**
24+
* Sample demonstrating how to analyze documents from URL using Content Understanding service.
25+
* This sample shows:
26+
* 1. Providing a URL to a document
27+
* 2. Analyzing the document
28+
* 3. Extracting markdown content
29+
* 4. Accessing document properties (pages, tables, etc.)
30+
*/
31+
public class Sample02_AnalyzeUrl {
32+
33+
public static void main(String[] args) {
34+
// BEGIN: com.azure.ai.contentunderstanding.sample02.buildClient
35+
String endpoint = Configuration.getGlobalConfiguration().get("CONTENTUNDERSTANDING_ENDPOINT");
36+
String key = System.getenv("AZURE_CONTENT_UNDERSTANDING_KEY");
37+
38+
// Build the client with appropriate authentication
39+
ContentUnderstandingClientBuilder builder = new ContentUnderstandingClientBuilder().endpoint(endpoint);
40+
41+
ContentUnderstandingClient client;
42+
if (key != null && !key.trim().isEmpty()) {
43+
// Use API key authentication
44+
client = builder.credential(new AzureKeyCredential(key)).buildClient();
45+
} else {
46+
// Use default Azure credential (for managed identity, Azure CLI, etc.)
47+
client = builder.credential(new DefaultAzureCredentialBuilder().build()).buildClient();
48+
}
49+
// END: com.azure.ai.contentunderstanding.sample02.buildClient
50+
51+
// BEGIN:ContentUnderstandingAnalyzeUrlAsync
52+
// Using a publicly accessible sample file from Azure-Samples GitHub repository
53+
String uriSource
54+
= "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-dotnet/main/ContentUnderstanding.Common/data/invoice.pdf";
55+
56+
AnalyzeInput input = new AnalyzeInput();
57+
input.setUrl(uriSource);
58+
59+
SyncPoller<ContentAnalyzerAnalyzeOperationStatus, AnalyzeResult> operation
60+
= client.beginAnalyze("prebuilt-documentSearch", null, null, Arrays.asList(input), null);
61+
62+
AnalyzeResult result = operation.getFinalResult();
63+
// END:ContentUnderstandingAnalyzeUrlAsync
64+
65+
System.out.println("Analysis operation completed");
66+
System.out.println("Analysis result contains "
67+
+ (result.getContents() != null ? result.getContents().size() : 0) + " content(s)");
68+
69+
// A PDF file has only one content element even if it contains multiple pages
70+
MediaContent content = null;
71+
if (result.getContents() == null || result.getContents().isEmpty()) {
72+
System.out.println("(No content returned from analysis)");
73+
} else {
74+
content = result.getContents().get(0);
75+
if (content.getMarkdown() != null && !content.getMarkdown().isEmpty()) {
76+
System.out.println(content.getMarkdown());
77+
} else {
78+
System.out.println("(No markdown content available)");
79+
}
80+
}
81+
82+
if (content != null && content.getMarkdown() != null && !content.getMarkdown().isEmpty()) {
83+
System.out
84+
.println("Markdown content extracted successfully (" + content.getMarkdown().length() + " characters)");
85+
}
86+
87+
// Check if this is document content to access document-specific properties
88+
if (content instanceof DocumentContent) {
89+
DocumentContent documentContent = (DocumentContent) content;
90+
System.out.println("Document type: "
91+
+ (documentContent.getMimeType() != null ? documentContent.getMimeType() : "(unknown)"));
92+
System.out.println("Start page: " + documentContent.getStartPageNumber());
93+
System.out.println("End page: " + documentContent.getEndPageNumber());
94+
System.out.println(
95+
"Total pages: " + (documentContent.getEndPageNumber() - documentContent.getStartPageNumber() + 1));
96+
97+
// Check for pages
98+
if (documentContent.getPages() != null && !documentContent.getPages().isEmpty()) {
99+
System.out.println("Number of pages: " + documentContent.getPages().size());
100+
for (DocumentPage page : documentContent.getPages()) {
101+
String unit = documentContent.getUnit() != null ? documentContent.getUnit().toString() : "units";
102+
System.out.println(" Page " + page.getPageNumber() + ": " + page.getWidth() + " x "
103+
+ page.getHeight() + " " + unit);
104+
}
105+
}
106+
107+
// Check for tables
108+
if (documentContent.getTables() != null && !documentContent.getTables().isEmpty()) {
109+
System.out.println("Number of tables: " + documentContent.getTables().size());
110+
int tableCounter = 1;
111+
for (DocumentTable table : documentContent.getTables()) {
112+
System.out.println(" Table " + tableCounter + ": " + table.getRowCount() + " rows x "
113+
+ table.getColumnCount() + " columns");
114+
tableCounter++;
115+
}
116+
}
117+
} else {
118+
System.out.println("Content is MediaContent (not document-specific), skipping document properties");
119+
}
120+
121+
System.out.println("\nURL document analysis completed successfully");
122+
}
123+
}

0 commit comments

Comments
 (0)