|
| 1 | +--- |
| 2 | +title: Summarizing the Text Content of PDF Documents using Text Analytics with Azure AI services |
| 3 | +description: Learn how to summarize the text content from a PDF document using RadPdfProcessing and Text Analytics with Azure AI services. |
| 4 | +type: how-to |
| 5 | +page_title: How to Summarize the Text Content of PDF documents using Text Analytics with Azure AI services |
| 6 | +slug: summarize-pdf-content |
| 7 | +tags: pdf, document, processing, text, summarize, summary, content, azure |
| 8 | +res_type: kb |
| 9 | +ticketid: 1657503 |
| 10 | +--- |
| 11 | + |
| 12 | +## Environment |
| 13 | + |
| 14 | +| Version | Product | Author | |
| 15 | +| ---- | ---- | ---- | |
| 16 | +| 2025.1.128| RadPdfProcessing |[Desislava Yordanova](https://www.telerik.com/blogs/author/desislava-yordanova)| |
| 17 | + |
| 18 | +## Description |
| 19 | + |
| 20 | +Learn how to summarize the text content of a PDF document using [Text Analytics with Azure AI services](https://learn.microsoft.com/en-us/azure/synapse-analytics/machine-learning/tutorial-text-analytics-use-mmlspark). |
| 21 | + |
| 22 | +## Solution |
| 23 | + |
| 24 | +Follow the steps: |
| 25 | + |
| 26 | +1\. Before going further, you can find listed below the **required** assemblies/NuGet packages that should be added to your project: |
| 27 | + |
| 28 | +* [Azure.AI.TextAnalytics](https://www.nuget.org/packages/Azure.AI.TextAnalytics) |
| 29 | +* Telerik.Documents.Fixed |
| 30 | +* Telerik.Documents.Core |
| 31 | +* Telerik.Zip |
| 32 | + |
| 33 | +2\. It is necessary to generate your Azure AI key and endpoint: [Get your credentials from your Azure AI services resource](https://learn.microsoft.com/en-us/azure/ai-services/use-key-vault?tabs=azure-cli&pivots=programming-language-csharp) |
| 34 | + |
| 35 | + |
| 36 | + |
| 37 | +3\. [Extract the text content from a PDF document]({%slug extract-text-from-pdf%}). |
| 38 | + |
| 39 | +4\. Use the custom implementation to summarize the text content extracted in step 3: |
| 40 | + |
| 41 | +```csharp |
| 42 | + static void Main(string[] args) |
| 43 | + { |
| 44 | + Telerik.Windows.Documents.Fixed.FormatProviders.Pdf.PdfFormatProvider pdf_provider = new PdfFormatProvider(); |
| 45 | + Telerik.Windows.Documents.Fixed.FormatProviders.Text.TextFormatProvider text_provider = new TextFormatProvider(); |
| 46 | + Telerik.Windows.Documents.Fixed.Model.RadFixedDocument document = pdf_provider.Import(File.ReadAllBytes("PdfDocument.pdf"), TimeSpan.FromSeconds(10)); |
| 47 | + string documentTextContent = text_provider.Export(document); |
| 48 | + |
| 49 | + AzureTextSummarizationProvider summarizationProvider = new AzureTextSummarizationProvider(azure_key, azure_endpoint); |
| 50 | + string summary = summarizationProvider.SummarizeText(documentTextContent).Result; |
| 51 | + |
| 52 | + Console.WriteLine(summary); |
| 53 | + } |
| 54 | + |
| 55 | + public class AzureTextSummarizationProvider |
| 56 | + { |
| 57 | + private string languageKey; |
| 58 | + private string languageEndpoint; |
| 59 | + |
| 60 | + public AzureTextSummarizationProvider(string azure_key, string azure_endpoint) |
| 61 | + { |
| 62 | + this.languageKey = azure_key; |
| 63 | + this.languageEndpoint = azure_endpoint; |
| 64 | + } |
| 65 | + |
| 66 | + public async Task<string> SummarizeText(string text) |
| 67 | + { |
| 68 | + Azure.AzureKeyCredential credentials = new Azure.AzureKeyCredential(languageKey); |
| 69 | + Uri endpoint = new Uri(languageEndpoint); |
| 70 | + |
| 71 | + Azure.AI.TextAnalytics.TextAnalyticsClient client = new Azure.AI.TextAnalytics.TextAnalyticsClient(endpoint, credentials); |
| 72 | + |
| 73 | + // Prepare analyze operation input. You can add multiple documents to this list and perform the same |
| 74 | + // operation to all of them. |
| 75 | + List<string> batchInput = new List<string> |
| 76 | + { |
| 77 | + text |
| 78 | + }; |
| 79 | + |
| 80 | + Azure.AI.TextAnalytics.TextAnalyticsActions actions = new Azure.AI.TextAnalytics.TextAnalyticsActions() |
| 81 | + { |
| 82 | + ExtractiveSummarizeActions = [new Azure.AI.TextAnalytics.ExtractiveSummarizeAction()] |
| 83 | + }; |
| 84 | + |
| 85 | + // Start analysis process. |
| 86 | + Azure.AI.TextAnalytics.AnalyzeActionsOperation operation = await client.StartAnalyzeActionsAsync(batchInput, actions); |
| 87 | + await operation.WaitForCompletionAsync(); |
| 88 | + |
| 89 | + System.Text.StringBuilder stringBuilder = new System.Text.StringBuilder(); |
| 90 | + // View operation status. |
| 91 | + stringBuilder.AppendLine($"AnalyzeActions operation has completed"); |
| 92 | + stringBuilder.AppendLine(); |
| 93 | + |
| 94 | + stringBuilder.AppendLine($"Created On : {operation.CreatedOn}"); |
| 95 | + stringBuilder.AppendLine($"Expires On : {operation.ExpiresOn}"); |
| 96 | + stringBuilder.AppendLine($"Id : {operation.Id}"); |
| 97 | + stringBuilder.AppendLine($"Status : {operation.Status}"); |
| 98 | + |
| 99 | + stringBuilder.AppendLine(); |
| 100 | + // View operation results. |
| 101 | + await foreach (Azure.AI.TextAnalytics.AnalyzeActionsResult documentsInPage in operation.Value) |
| 102 | + { |
| 103 | + IReadOnlyCollection<Azure.AI.TextAnalytics.ExtractiveSummarizeActionResult> summaryResults = documentsInPage.ExtractiveSummarizeResults; |
| 104 | + |
| 105 | + foreach (Azure.AI.TextAnalytics.ExtractiveSummarizeActionResult summaryActionResults in summaryResults) |
| 106 | + { |
| 107 | + if (summaryActionResults.HasError) |
| 108 | + { |
| 109 | + stringBuilder.AppendLine($" Error!"); |
| 110 | + stringBuilder.AppendLine($" Action error code: {summaryActionResults.Error.ErrorCode}."); |
| 111 | + stringBuilder.AppendLine($" Message: {summaryActionResults.Error.Message}"); |
| 112 | + continue; |
| 113 | + } |
| 114 | + |
| 115 | + foreach (Azure.AI.TextAnalytics.ExtractiveSummarizeResult documentResults in summaryActionResults.DocumentsResults) |
| 116 | + { |
| 117 | + if (documentResults.HasError) |
| 118 | + { |
| 119 | + stringBuilder.AppendLine($" Error!"); |
| 120 | + stringBuilder.AppendLine($" Document error code: {documentResults.Error.ErrorCode}."); |
| 121 | + stringBuilder.AppendLine($" Message: {documentResults.Error.Message}"); |
| 122 | + continue; |
| 123 | + } |
| 124 | + |
| 125 | + stringBuilder.AppendLine($" Extracted the following {documentResults.Sentences.Count} sentence(s):"); |
| 126 | + stringBuilder.AppendLine(); |
| 127 | + |
| 128 | + foreach (Azure.AI.TextAnalytics.ExtractiveSummarySentence sentence in documentResults.Sentences) |
| 129 | + { |
| 130 | + stringBuilder.Append($"{sentence.Text} "); |
| 131 | + } |
| 132 | + } |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + string result = stringBuilder.ToString(); |
| 137 | + |
| 138 | + return result; |
| 139 | + } |
| 140 | + } |
| 141 | +``` |
| 142 | + |
| 143 | +## See Also |
| 144 | + |
| 145 | +- [Extracting Text from PDF Documents]({%slug extract-text-from-pdf%}) |
| 146 | +- [OcrFormatProvider]({%slug radpdfprocessing-formats-and-conversion-ocr-ocrformatprovider%}) |
| 147 | +- [TextFormatProvider]({%slug radpdfprocessing-formats-and-conversion-plain-text-textformatprovider%}) |
| 148 | + |
0 commit comments