Skip to content

Commit ffb6ef9

Browse files
authored
Merge pull request #657 from telerik/new-kb-wordsprocessing-preventing-table-row-splitting-html-pdf-9075317d1d524354ad872db3aa8c5c43
Added new kb article wordsprocessing-preventing-table-row-splitting-html-pdf
2 parents 6c6c85e + 0d562b2 commit ffb6ef9

File tree

2 files changed

+161
-0
lines changed

2 files changed

+161
-0
lines changed
104 KB
Loading
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
---
2+
title: Preventing Table Row Splitting Across Pages When Converting HTML to PDF
3+
description: Learn how to prevent table rows from splitting across pages when converting HTML content to PDF using Telerik Document Processing.
4+
type: how-to
5+
page_title: Avoiding Table Row Splitting in HTML to PDF Conversion
6+
meta_title: Avoiding Table Row Splitting in HTML to PDF Conversion
7+
slug: wordsprocessing-preventing-table-row-splitting-html-pdf
8+
tags: words, processing, telerik, document, html, pdf, conversion, table, row, splitting, split, flow, word, measure, calculate
9+
res_type: kb
10+
ticketid: 1700721
11+
---
12+
13+
# Environment
14+
| Version | Product | Author |
15+
| --- | --- | ---- |
16+
| 2025.3.806 | RadWordsProcessing |[Yoan Karamanov](https://www.telerik.com/blogs/author/yoan-karamanov)|
17+
18+
## Description
19+
20+
This article shows how to use [WordsProcessing]({%slug radwordsprocessing-overview%}) and [PdfProcessing]({%slug radpdfprocessing-overview%}) libraries to convert HTML with tables to a PDF, without splitting rows across pages.
21+
22+
This knowledge base article also answers the following questions:
23+
- How can I prevent table rows from splitting across pages during HTML to PDF conversion?
24+
- How do I handle uneven row heights in tables when exporting to PDF?
25+
- How can I ensure HTML table rows are preserved on a single page during HTML to PDF conversion?
26+
27+
## Solution
28+
29+
To prevent table rows from splitting across pages, manually recreate the PDF table from scratch by copying the HTML table content to a new PDF table. Use the **Measure** method to check whether the table exceeds the page boundary. If it does, create a new page and continue building the table.
30+
31+
### Steps to Implement
32+
33+
1. **Set up the HTML import settings:** Use the [HtmlFormatProvider]({%slug radwordsprocessing-formats-and-conversion-html-htmlformatprovider%}) and implement the [LoadImageFromUri]({%slug radwordsprocessing-formats-and-conversion-html-settings%}#loadimagefromuri-and-loadstylesheetfromuri-events) event for resolving images in the HTML content.
34+
35+
2. **Load the HTML document:** Import the HTML content into a [RadFlowDocument]({%slug radwordsprocessing-model-radflowdocument%}) object.
36+
37+
3. **Extract rows from the HTML table:** Enumerate the rows from the HTML table.
38+
39+
4. **Create and format a new PDF table:** For each page, create a new table and add rows while ensuring they fit within the page boundaries, while also setting the desired formatting.
40+
41+
5. **Check row measurements:** After adding each row, use the **Measure** method to verify whether the table exceeds the page height. If it does, move the remaining rows to a new page.
42+
43+
6. **Export the PDF:** Use the [PdfFormatProvider]({%slug radpdfprocessing-formats-and-conversion-pdf-pdfformatprovider%}) to save the final PDF document.
44+
45+
![HTML to PDF Table Without Split Rows](images/html-pdf-table-no-split-rows.png)
46+
47+
```csharp
48+
const string InputHtmlPath = "..\\..\\..\\input.html";
49+
const string OutputPdfPath = "..\\..\\..\\output.pdf";
50+
51+
static void Main(string[] args)
52+
{
53+
HtmlFormatProvider htmlFormatProvider = new HtmlFormatProvider();
54+
HtmlImportSettings importSettings = new HtmlImportSettings();
55+
56+
importSettings.LoadImageFromUri += (s, e) =>
57+
{
58+
var data = new System.Net.WebClient().DownloadData(e.Uri);
59+
e.SetImageInfo(data, Path.GetExtension(e.Uri).Substring(1));
60+
};
61+
62+
htmlFormatProvider.ImportSettings = importSettings;
63+
64+
RadFlowDocument htmlDocument;
65+
using (var input = File.OpenRead(InputHtmlPath))
66+
htmlDocument = htmlFormatProvider.Import(input, null);
67+
68+
var htmlRows = htmlDocument.EnumerateChildrenOfType<Telerik.Windows.Documents.Flow.Model.Table>()
69+
.FirstOrDefault().Rows.ToList();
70+
71+
var mainPdfDocument = new RadFixedDocument();
72+
int currentRowIndex = 0;
73+
74+
// Process rows across multiple pages
75+
while (currentRowIndex < htmlRows.Count)
76+
{
77+
var pdfPage = mainPdfDocument.Pages.AddPage();
78+
var pdfTable = CreateNewTable();
79+
int rowsAdded = 0;
80+
81+
while (currentRowIndex < htmlRows.Count)
82+
{
83+
var testTable = CreateNewTable();
84+
85+
// Copy existing rows to test table
86+
for (int i = currentRowIndex - rowsAdded; i < currentRowIndex; i++)
87+
if (i >= 0) AddRowToTable(testTable, htmlRows[i], pdfPage);
88+
89+
AddRowToTable(testTable, htmlRows[currentRowIndex], pdfPage);
90+
91+
// Check if exceeds page height
92+
if (testTable.Measure().Height > pdfPage.Size.Height && rowsAdded > 0)
93+
break;
94+
95+
AddRowToTable(pdfTable, htmlRows[currentRowIndex], pdfPage);
96+
rowsAdded++;
97+
currentRowIndex++;
98+
}
99+
100+
new FixedContentEditor(pdfPage).DrawTable(pdfTable);
101+
}
102+
103+
File.Delete(OutputPdfPath);
104+
using (var output = File.OpenWrite(OutputPdfPath))
105+
new PdfFormatProvider().Export(mainPdfDocument, output, null);
106+
107+
Process.Start(new ProcessStartInfo(OutputPdfPath) { UseShellExecute = true });
108+
}
109+
110+
private static Telerik.Windows.Documents.Fixed.Model.Editing.Tables.Table CreateNewTable()
111+
{
112+
var border = new Border(1, new RgbColor(0, 0, 0));
113+
return new Telerik.Windows.Documents.Fixed.Model.Editing.Tables.Table
114+
{
115+
DefaultCellProperties = { Borders = new Telerik.Windows.Documents.Fixed.Model.Editing.Tables.TableCellBorders(border, border, border, border) },
116+
Margin = new Thickness(10)
117+
};
118+
}
119+
120+
private static void AddRowToTable(Telerik.Windows.Documents.Fixed.Model.Editing.Tables.Table pdfTable, TableRow htmlRow, RadFixedPage pdfPage)
121+
{
122+
var pdfRow = pdfTable.Rows.AddTableRow();
123+
124+
for (int i = 0; i < htmlRow.Cells.Count; i++)
125+
{
126+
var pdfCell = pdfRow.Cells.AddTableCell();
127+
pdfCell.PreferredWidth = pdfPage.Size.Width * (i == 0 ? 0.05 : 0.25);
128+
pdfCell.Padding = new Thickness(5);
129+
pdfCell.Background = (pdfTable.Rows.Count % 2 == 0 ? new RgbColor(249, 249, 249) : new RgbColor(255, 255, 255));
130+
131+
foreach (var htmlBlock in htmlRow.Cells[i].Blocks.OfType<Paragraph>())
132+
{
133+
var pdfBlock = pdfCell.Blocks.AddBlock();
134+
pdfBlock.HorizontalAlignment = Telerik.Windows.Documents.Fixed.Model.Editing.Flow.HorizontalAlignment.Left;
135+
pdfBlock.VerticalAlignment = Telerik.Windows.Documents.Fixed.Model.Editing.Flow.VerticalAlignment.Center;
136+
137+
foreach (var htmlInline in htmlBlock.Inlines)
138+
{
139+
if (htmlInline is ImageInline htmlImageInline)
140+
{
141+
using (var stream = new MemoryStream(htmlImageInline.Image.ImageSource.Data))
142+
pdfBlock.InsertImage(new Telerik.Windows.Documents.Fixed.Model.Resources.ImageSource(stream));
143+
}
144+
else if (htmlInline is Run run)
145+
{
146+
pdfBlock.InsertText(run.Text);
147+
}
148+
else if (htmlInline is Break)
149+
{
150+
pdfBlock.InsertLineBreak();
151+
}
152+
}
153+
}
154+
}
155+
}
156+
```
157+
158+
## See Also
159+
* [Table]({%slug radpdfprocessing-editing-table-overview%})
160+
* [TableRow]({%slug radpdfprocessing-editing-table-tablerow%})
161+
* [TableCell]({%slug radpdfprocessing-editing-table-tablecell%})

0 commit comments

Comments
 (0)