Skip to content

Commit 11b077a

Browse files
fix: preserve object order for PDFs with incremental updates (#951)
This fixes a critical issue where PDFs using the Incremental Update feature would become corrupted after loading and saving with pdf-lib. ## Problem When loading and saving PDFs without adding new content, the library would always sort indirect objects by object number during enumeration. This broke PDFs with incremental updates because the XRef byte offsets became invalid when objects were reordered. ## Solution Introduce a 'needsReordering' flag in PDFContext that tracks whether new objects have been registered: - When register() is called to add NEW objects, needsReordering is set to true - enumerateIndirectObjects() only sorts objects when needsReordering is true - If only existing objects are modified (no new objects added), the original parsing order is preserved This ensures backward compatibility - PDFs with new content are still properly sorted, while PDFs with only modifications maintain their original structure. ## Changes - src/core/PDFContext.ts: Add needsReordering flag and conditional sorting - tests/core/PDFContext_ordering.spec.ts: Add unit and integration tests - assets/pdfs/with_incremental_updates.pdf: Test PDF file for the issue Fixes #951
1 parent 93dd36e commit 11b077a

File tree

3 files changed

+159
-3
lines changed

3 files changed

+159
-3
lines changed
5.63 MB
Binary file not shown.

src/core/PDFContext.ts

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,24 @@ class PDFContext {
6262
private pushGraphicsStateContentStreamRef?: PDFRef;
6363
private popGraphicsStateContentStreamRef?: PDFRef;
6464

65+
/**
66+
* Tracks whether new objects have been registered in this context.
67+
* When true, objects will be sorted by object number during enumeration.
68+
* When false, objects maintain their original order (important for PDFs
69+
* with incremental updates where reordering can cause corruption).
70+
*
71+
* @see https://github.com/Hopding/pdf-lib/issues/951
72+
*/
73+
private needsReordering: boolean;
74+
6575
private constructor() {
6676
this.largestObjectNumber = 0;
6777
this.header = PDFHeader.forVersion(1, 7);
6878
this.trailerInfo = {};
6979

7080
this.indirectObjects = new Map();
7181
this.rng = SimpleRNG.withSeed(1);
82+
this.needsReordering = false;
7283
}
7384

7485
assign(ref: PDFRef, object: PDFObject): void {
@@ -85,6 +96,7 @@ class PDFContext {
8596

8697
register(object: PDFObject): PDFRef {
8798
const ref = this.nextRef();
99+
this.needsReordering = true;
88100
this.assign(ref, object);
89101
return ref;
90102
}
@@ -178,10 +190,24 @@ class PDFContext {
178190
return undefined;
179191
}
180192

193+
/**
194+
* Returns all indirect objects in this context.
195+
*
196+
* If new objects have been registered (needsReordering=true), objects are
197+
* sorted by ascending object number to ensure proper XRef table generation.
198+
*
199+
* If no new objects have been registered (needsReordering=false), objects
200+
* maintain their original parsing order. This is crucial for PDFs with
201+
* incremental updates, where reordering objects can cause corruption.
202+
*
203+
* @see https://github.com/Hopding/pdf-lib/issues/951
204+
*/
181205
enumerateIndirectObjects(): [PDFRef, PDFObject][] {
182-
return Array.from(this.indirectObjects.entries()).sort(
183-
byAscendingObjectNumber,
184-
);
206+
const entries = Array.from(this.indirectObjects.entries());
207+
if (this.needsReordering) {
208+
return entries.sort(byAscendingObjectNumber);
209+
}
210+
return entries;
185211
}
186212

187213
obj(literal: null | undefined): typeof PDFNull;
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import fs from 'fs';
2+
import { PDFDocument } from 'src/index';
3+
import PDFContext from 'src/core/PDFContext';
4+
import PDFRef from 'src/core/objects/PDFRef';
5+
6+
/**
7+
* Tests for Issue #951 - Corrupted PDF on save with incremental updates
8+
* @see https://github.com/Hopding/pdf-lib/issues/951
9+
*
10+
* The issue occurs when pdf-lib loads and saves PDFs that use the "Incremental
11+
* Update" feature of the PDF format. The previous implementation always sorted
12+
* objects by object number during enumeration, which corrupted PDFs by
13+
* invalidating the byte offsets in the XRef table.
14+
*
15+
* The fix tracks whether new objects have been registered, and only sorts
16+
* objects when necessary (when new objects are added that need to be
17+
* integrated into the object number sequence).
18+
*/
19+
describe('Issue #951 - Corrupted PDF on save with incremental updates', () => {
20+
describe('PDFContext.enumerateIndirectObjects ordering', () => {
21+
it('should NOT sort objects when no new objects are registered', () => {
22+
const context = PDFContext.create();
23+
24+
// Simulate loading objects from a PDF - assign directly without using register()
25+
// This mimics what the parser does when loading an existing PDF
26+
const obj1 = context.obj({ Type: 'Font' });
27+
const obj2 = context.obj({ Type: 'Page' });
28+
const obj3 = context.obj({ Type: 'Catalog' });
29+
30+
// PDFRef.of creates refs with specific object numbers
31+
// Assign in non-sequential order to test ordering behavior
32+
// In real parsing, objects may not come in sequential order
33+
context.assign(PDFRef.of(5), obj1);
34+
context.assign(PDFRef.of(2), obj2);
35+
context.assign(PDFRef.of(8), obj3);
36+
37+
// When NO new objects are registered via register(),
38+
// enumeration should preserve insertion order (not sorted)
39+
const entries = context.enumerateIndirectObjects();
40+
const objectNumbers = entries.map(([ref]) => ref.objectNumber);
41+
42+
// Should maintain insertion order: 5, 2, 8 (not sorted as 2, 5, 8)
43+
expect(objectNumbers).toEqual([5, 2, 8]);
44+
});
45+
46+
it('should sort objects when new objects ARE registered', () => {
47+
const context = PDFContext.create();
48+
49+
// Assign existing objects
50+
const obj1 = context.obj({ Type: 'Font' });
51+
const obj2 = context.obj({ Type: 'Page' });
52+
context.assign(PDFRef.of(5), obj1);
53+
context.assign(PDFRef.of(2), obj2);
54+
55+
// Register a NEW object (this triggers needsReordering = true)
56+
const newObj = context.obj({ Type: 'NewObject' });
57+
context.register(newObj);
58+
59+
// Now enumeration SHOULD sort by object number
60+
const entries = context.enumerateIndirectObjects();
61+
const objectNumbers = entries.map(([ref]) => ref.objectNumber);
62+
63+
64+
// Should be sorted: 2, 5, 6 (the new one gets objectNumber 6 since largestObjectNumber was 5)
65+
expect(objectNumbers).toEqual([2, 5, 6]);
66+
});
67+
});
68+
69+
describe('Integration tests with real PDFs', () => {
70+
const incrementalUpdatesPdfPath = 'assets/pdfs/with_incremental_updates.pdf';
71+
const normalPdfPath = 'assets/pdfs/normal.pdf';
72+
73+
const incrementalFileExists = fs.existsSync(incrementalUpdatesPdfPath);
74+
const normalFileExists = fs.existsSync(normalPdfPath);
75+
76+
(incrementalFileExists ? it : it.skip)(
77+
'should preserve PDF with incremental updates when loading and saving without modifications',
78+
async () => {
79+
const inputBytes = fs.readFileSync(incrementalUpdatesPdfPath);
80+
81+
// Load the PDF without making any modifications
82+
const pdfDoc = await PDFDocument.load(inputBytes);
83+
const pageCount = pdfDoc.getPageCount();
84+
85+
// Save the PDF
86+
const savedBytes = await pdfDoc.save();
87+
88+
// Verify we can reload the saved PDF
89+
const reloadedDoc = await PDFDocument.load(savedBytes);
90+
expect(reloadedDoc.getPageCount()).toBe(pageCount);
91+
}
92+
);
93+
94+
(incrementalFileExists ? it : it.skip)(
95+
'should preserve PDF with incremental updates when saving with useObjectStreams: false',
96+
async () => {
97+
const inputBytes = fs.readFileSync(incrementalUpdatesPdfPath);
98+
99+
const pdfDoc = await PDFDocument.load(inputBytes);
100+
const pageCount = pdfDoc.getPageCount();
101+
102+
// Save with useObjectStreams: false (as mentioned in the issue)
103+
const savedBytes = await pdfDoc.save({ useObjectStreams: false });
104+
105+
// Verify reload
106+
const reloadedDoc = await PDFDocument.load(savedBytes);
107+
expect(reloadedDoc.getPageCount()).toBe(pageCount);
108+
}
109+
);
110+
111+
(normalFileExists ? it : it.skip)(
112+
'should still correctly save PDFs when new content is added',
113+
async () => {
114+
const inputBytes = fs.readFileSync(normalPdfPath);
115+
116+
const pdfDoc = await PDFDocument.load(inputBytes);
117+
const originalPageCount = pdfDoc.getPageCount();
118+
119+
// Add a new page (this should trigger reordering)
120+
pdfDoc.addPage();
121+
122+
const savedBytes = await pdfDoc.save();
123+
124+
// Verify the new page was added correctly
125+
const reloadedDoc = await PDFDocument.load(savedBytes);
126+
expect(reloadedDoc.getPageCount()).toBe(originalPageCount + 1);
127+
}
128+
);
129+
});
130+
});

0 commit comments

Comments
 (0)