Skip to content

Commit 04644ee

Browse files
authored
fix(api): resolve indirect refs in resource subdictionaries (#32)
addXObjectResource and registerResource called resources.get() without a resolver, so when a resource subdictionary (e.g. /XObject) was stored as an indirect PdfRef, it failed the instanceof PdfDict check and silently replaced the existing dict with an empty one. This dropped all pre-existing resource entries. Scanner-produced PDFs (e.g. Konica Minolta) commonly store /Resources /XObject as an indirect object, so embedPage + drawPage on those PDFs would erase the original page content entirely. addFontResource already passed the resolver correctly; this aligns the other two methods to match.
1 parent 65ecf18 commit 04644ee

File tree

3 files changed

+146
-2
lines changed

3 files changed

+146
-2
lines changed
1.46 KB
Binary file not shown.

src/api/pdf-page.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2360,7 +2360,7 @@ export class PDFPage {
23602360
*/
23612361
private addXObjectResource(ref: PdfRef): string {
23622362
const resources = this.getResources();
2363-
let xobjects = resources.get("XObject");
2363+
let xobjects = resources.get("XObject", this.ctx.resolve.bind(this.ctx));
23642364

23652365
if (!(xobjects instanceof PdfDict)) {
23662366
xobjects = new PdfDict();
@@ -2409,7 +2409,7 @@ export class PDFPage {
24092409

24102410
// Get or create the resource subdictionary
24112411
const resources = this.getResources();
2412-
let subdict = resources.get(resourceType);
2412+
let subdict = resources.get(resourceType, this.ctx.resolve.bind(this.ctx));
24132413

24142414
if (!(subdict instanceof PdfDict)) {
24152415
subdict = new PdfDict();
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/**
2+
* Regression test: drawPage / embedPage loses existing page content
3+
* when the page's Resources/XObject subdictionary is an indirect reference.
4+
*
5+
* The bug: `addXObjectResource` and `registerResource` in PDFPage called
6+
* `resources.get("XObject")` without passing a resolver. When the XObject
7+
* entry was a PdfRef (indirect object), the check `!(xobjects instanceof PdfDict)`
8+
* evaluated to true, causing the method to create a brand-new empty PdfDict
9+
* and overwrite the existing XObject dictionary. This silently dropped all
10+
* pre-existing XObject entries (images, form XObjects, etc.), making the
11+
* original page content invisible.
12+
*
13+
* PDFs produced by scanners (e.g. Konica Minolta) commonly use this structure:
14+
* the page content is a single `Do` operator referencing an Image XObject,
15+
* and the Resources/XObject dict is stored as an indirect object.
16+
*
17+
* The fixture `scenarios/indirect-xobject-resources.pdf` has a page whose
18+
* /Resources /XObject is an indirect PdfRef pointing to a dict with an
19+
* existing Form XObject (`Fm0`) that renders the visible page content.
20+
*/
21+
22+
import { PDF } from "#src/api/pdf";
23+
import { PdfDict } from "#src/objects/pdf-dict";
24+
import { PdfRef } from "#src/objects/pdf-ref";
25+
import { loadFixture, saveTestOutput } from "#src/test-utils";
26+
import { describe, expect, it } from "vitest";
27+
28+
describe("drawPage with indirect XObject resources", () => {
29+
it("preserves existing XObject entries when Resources/XObject is an indirect ref", async () => {
30+
const pdfBytes = await loadFixture("scenarios", "indirect-xobject-resources.pdf");
31+
const pdf = await PDF.load(pdfBytes);
32+
const page = pdf.getPage(0)!;
33+
34+
const resolve = (ref: PdfRef) => pdf.getObject(ref);
35+
const resources = page.getResources();
36+
37+
// Precondition: the XObject subdict is an indirect reference
38+
const xobjectsRaw = resources.get("XObject");
39+
expect(xobjectsRaw).toBeInstanceOf(PdfRef);
40+
41+
// Precondition: it resolves to a dict containing the original XObject
42+
const xobjectsBefore = resources.get("XObject", resolve) as PdfDict;
43+
expect(xobjectsBefore).toBeInstanceOf(PdfDict);
44+
45+
const originalKeys = [...xobjectsBefore.keys()].map(k => k.value);
46+
expect(originalKeys).toContain("Fm0");
47+
48+
// embedPage + drawPage (the Documenso overlay pattern)
49+
const overlayPdf = PDF.create();
50+
const overlayPage = overlayPdf.addPage({ width: page.width, height: page.height });
51+
52+
overlayPage.drawText("OVERLAY", {
53+
x: 100,
54+
y: 100,
55+
size: 20,
56+
color: { type: "RGB", red: 0, green: 0, blue: 0 },
57+
});
58+
59+
const overlayDoc = await PDF.load(await overlayPdf.save());
60+
const embedded = await pdf.embedPage(overlayDoc, 0);
61+
62+
page.drawPage(embedded, { x: 0, y: 0 });
63+
64+
// The XObject dict must still contain the original entry
65+
const xobjectsAfter = resources.get("XObject", resolve) as PdfDict;
66+
67+
expect(xobjectsAfter).toBeInstanceOf(PdfDict);
68+
expect(xobjectsAfter.has("Fm0")).toBe(true);
69+
70+
// And also the newly added form XObject (Fm1 since Fm0 is taken)
71+
const afterKeys = [...xobjectsAfter.keys()].map(k => k.value);
72+
expect(afterKeys.length).toBeGreaterThan(originalKeys.length);
73+
74+
// Save, reload, verify content survives round-trip
75+
const savedBytes = await pdf.save({ useXRefStream: true });
76+
77+
await saveTestOutput("issues/drawpage-indirect-xobject.pdf", savedBytes);
78+
79+
const reloaded = await PDF.load(savedBytes);
80+
const reloadedPage = reloaded.getPage(0)!;
81+
const reloadedResolve = (ref: PdfRef) => reloaded.getObject(ref);
82+
const reloadedXObjects = reloadedPage.getResources().get("XObject", reloadedResolve) as PdfDict;
83+
84+
expect(reloadedXObjects).toBeInstanceOf(PdfDict);
85+
86+
// Original XObject must still be there
87+
expect(reloadedXObjects.has("Fm0")).toBe(true);
88+
});
89+
90+
it("preserves content through the full flatten + embed + flatten flow", async () => {
91+
const pdfBytes = await loadFixture("scenarios", "indirect-xobject-resources.pdf");
92+
const pdf = await PDF.load(pdfBytes);
93+
const page = pdf.getPage(0)!;
94+
95+
// Step 1: flattenAll (like Documenso does before signing)
96+
pdf.flattenAll();
97+
98+
// Step 2: embed + draw overlay
99+
const overlayPdf = PDF.create();
100+
const overlayPage = overlayPdf.addPage({ width: page.width, height: page.height });
101+
102+
overlayPage.drawRectangle({
103+
x: 50,
104+
y: 50,
105+
width: 200,
106+
height: 40,
107+
color: { type: "RGB", red: 0.9, green: 0.9, blue: 1 },
108+
borderColor: { type: "RGB", red: 0, green: 0, blue: 0.5 },
109+
borderWidth: 1,
110+
});
111+
112+
const overlayDoc = await PDF.load(await overlayPdf.save());
113+
const embedded = await pdf.embedPage(overlayDoc, 0);
114+
115+
page.drawPage(embedded, { x: 0, y: 0 });
116+
117+
// Step 3: flattenAll again
118+
pdf.flattenAll();
119+
120+
// Step 4: Save with xref stream
121+
const savedBytes = await pdf.save({ useXRefStream: true });
122+
123+
await saveTestOutput("issues/drawpage-indirect-xobject-full-flow.pdf", savedBytes);
124+
125+
// The file should not have lost the original XObject data.
126+
// Before the fix, the file shrank dramatically because the original
127+
// Form XObject (the entire visible page content) was silently dropped.
128+
expect(savedBytes.length).toBeGreaterThan(pdfBytes.length * 0.5);
129+
130+
// Reload and verify original XObject is still present
131+
const reloaded = await PDF.load(savedBytes);
132+
133+
const reloadedPage = reloaded.getPage(0)!;
134+
const resolve = (ref: PdfRef) => reloaded.getObject(ref);
135+
const xobjects = reloadedPage.getResources().get("XObject", resolve) as PdfDict;
136+
137+
expect(xobjects).toBeInstanceOf(PdfDict);
138+
139+
const keys = [...xobjects.keys()].map(k => k.value);
140+
141+
expect(keys).toContain("Fm0");
142+
expect(keys.length).toBeGreaterThanOrEqual(2);
143+
});
144+
});

0 commit comments

Comments
 (0)