Skip to content

Commit 7fc5706

Browse files
authored
Merge pull request #20327 from edoardocavazza/collect-list-table-children
Collect all child nodes of lists and tables in StructTree
2 parents de7179f + a932a06 commit 7fc5706

File tree

4 files changed

+46
-0
lines changed

4 files changed

+46
-0
lines changed

src/core/struct_tree.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,23 @@ class StructTreePage {
824824

825825
const element = new StructElementNode(this, dict);
826826
map.set(dict, element);
827+
switch (element.role) {
828+
case "L":
829+
case "LBody":
830+
case "LI":
831+
case "Table":
832+
case "THead":
833+
case "TBody":
834+
case "TFoot":
835+
case "TR": {
836+
// Always collect all child nodes of lists and tables, even empty ones
837+
for (const kid of element.kids) {
838+
if (kid.type === StructElementType.ELEMENT) {
839+
this.addNode(kid.dict, map, level - 1);
840+
}
841+
}
842+
}
843+
}
827844

828845
const parent = dict.get("P");
829846

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@
620620
!autoprint.pdf
621621
!bug1811694.pdf
622622
!bug1811510.pdf
623+
!issue20324.pdf
623624
!bug1815476.pdf
624625
!issue16021.pdf
625626
!bug1770750.pdf

test/pdfs/issue20324.pdf

10.9 KB
Binary file not shown.

test/unit/struct_tree_spec.js

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,34 @@ describe("struct tree", function () {
300300
},
301301
struct
302302
);
303+
});
304+
305+
it("should collect all list and table items in StructTree", async function () {
306+
const findNodes = (node, check) => {
307+
const results = [];
308+
if (check(node)) {
309+
results.push(node);
310+
}
311+
if (node.children) {
312+
for (const child of node.children) {
313+
results.push(...findNodes(child, check));
314+
}
315+
}
316+
return results;
317+
};
318+
const loadingTask = getDocument(buildGetDocumentParams("issue20324.pdf"));
319+
320+
const pdfDoc = await loadingTask.promise;
321+
const page = await pdfDoc.getPage(1);
322+
const tree = await page.getStructTree({
323+
includeMarkedContent: true,
324+
});
325+
const cells = findNodes(tree, node => node.role === "TD");
326+
expect(cells.length).toEqual(4);
327+
328+
const listItems = findNodes(tree, node => node.role === "LI");
329+
expect(listItems.length).toEqual(4);
330+
303331
await loadingTask.destroy();
304332
});
305333
});

0 commit comments

Comments
 (0)