diff --git a/src/core/catalog.js b/src/core/catalog.js index 9dd20e0e73be2..1ee2166bfcacf 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -122,6 +122,8 @@ function fetchRemoteDest(action) { return null; } +const MIN_PAGES_TO_ASSUME_ALL_TOP_LEVEL = 20; + class Catalog { constructor(pdfManager, xref) { this.pdfManager = pdfManager; @@ -1190,6 +1192,14 @@ class Catalog { pageDictCache = this.pageDictCache; let currentPageIndex = 0; + const getIsPage = async obj => { + let type = obj.getRaw("Type"); + if (type instanceof Ref) { + type = await xref.fetchAsync(type); + } + return isName(type, "Page") || !obj.has("Kids"); + }; + while (nodesToVisit.length) { const currentNode = nodesToVisit.pop(); @@ -1209,11 +1219,8 @@ class Catalog { const obj = await (pageDictCache.get(currentNode) || xref.fetchAsync(currentNode)); if (obj instanceof Dict) { - let type = obj.getRaw("Type"); - if (type instanceof Ref) { - type = await xref.fetchAsync(type); - } - if (isName(type, "Page") || !obj.has("Kids")) { + const isPage = await getIsPage(obj); + if (isPage) { // Cache the Page reference, since it can *greatly* improve // performance by reducing redundant lookups in long documents // where all nodes are found at *one* level of the tree. @@ -1288,6 +1295,22 @@ class Catalog { // Always check all `Kids` nodes, to avoid getting stuck in an empty // node further down in the tree (see issue5644.pdf, issue8088.pdf), // and to ensure that we actually find the correct `Page` dict. + + // EXCEPT if it looks like this is likely an "everything in the top level + // /Pages" PDF and there's enough pages to be worth not reading everything + // See if the n'th item is a page + if ( + currentNode === this.toplevelPagesDict && + count >= MIN_PAGES_TO_ASSUME_ALL_TOP_LEVEL && + count === kids.length + ) { + const maybePage = await xref.fetchAsync(kids[pageIndex]); + const isPage = await getIsPage(maybePage); + if (isPage) { + return [maybePage, kids[pageIndex]]; + } + } + for (let last = kids.length - 1; last >= 0; last--) { const lastKid = kids[last]; nodesToVisit.push(lastKid);