diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index f8eafbc17b481..e31b06a17e6fe 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -754,6 +754,23 @@ class StructTreePage { const element = new StructElementNode(this, dict); map.set(dict, element); + switch (element.role) { + case "L": + case "LBody": + case "LI": + case "Table": + case "THead": + case "TBody": + case "TFoot": + case "TR": { + // Always collect all child nodes of lists and tables, even empty ones + for (const kid of element.kids) { + if (kid.type === StructElementType.ELEMENT) { + this.addNode(kid.dict, map, level - 1); + } + } + } + } const parent = dict.get("P"); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 6b1ede9aefd21..0a79b24a4c4cd 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -620,6 +620,7 @@ !autoprint.pdf !bug1811694.pdf !bug1811510.pdf +!issue20324.pdf !bug1815476.pdf !issue16021.pdf !bug1770750.pdf diff --git a/test/pdfs/issue20324.pdf b/test/pdfs/issue20324.pdf new file mode 100644 index 0000000000000..c5f2a1d4ffc8d Binary files /dev/null and b/test/pdfs/issue20324.pdf differ diff --git a/test/unit/struct_tree_spec.js b/test/unit/struct_tree_spec.js index 0551565597c79..8c9c1c2343bab 100644 --- a/test/unit/struct_tree_spec.js +++ b/test/unit/struct_tree_spec.js @@ -151,4 +151,33 @@ describe("struct tree", function () { ); await loadingTask.destroy(); }); + + it("should collect all list and table items in StructTree", async function () { + const findNodes = (node, check) => { + const results = []; + if (check(node)) { + results.push(node); + } + if (node.children) { + for (const child of node.children) { + results.push(...findNodes(child, check)); + } + } + return results; + }; + const loadingTask = getDocument(buildGetDocumentParams("issue20324.pdf")); + + const pdfDoc = await loadingTask.promise; + const page = await pdfDoc.getPage(1); + const tree = await page.getStructTree({ + includeMarkedContent: true, + }); + const cells = findNodes(tree, node => node.role === "TD"); + expect(cells.length).toEqual(4); + + const listItems = findNodes(tree, node => node.role === "LI"); + expect(listItems.length).toEqual(4); + + await loadingTask.destroy(); + }); });