Skip to content

Commit bc102b6

Browse files
authored
perf: reduce allocations and copies in load-modify-save path (#34)
We were only ~1.5x faster than pdf-lib for load → modify → save, which is underwhelming given our architectural advantages. Profiling with bun --cpu-prof showed the bottleneck was allocation churn and unnecessary copying, not parsing or serialization logic. Key changes: - Pre-size ByteWriter buffers using size hints (original PDF length for full saves, estimated output sizes for filters/serializers) to avoid repeated geometric reallocation - Use subarray instead of slice for stream data in the parser — these are zero-copy views into the original PDF bytes which stay alive for the document lifetime anyway - Return the internal buffer directly from ByteWriter.toBytes() when it's already the right size (zero-copy fast path), fall back to subarray instead of slice for the trimmed case - Hoist the trailing-zero regex in formatPdfNumber out of the function body so it isn't recompiled on every call - Route page tree loading through registry.resolve so objects are tracked for modification detection (was using parsed.getObject which bypassed the registry)
1 parent 2e8ab77 commit bc102b6

File tree

11 files changed

+109
-22
lines changed

11 files changed

+109
-22
lines changed

scripts/profile-load-save.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/**
2+
* CPU profiling script for load-modify-save workflow.
3+
*
4+
* Usage: bun --cpu-prof-md scripts/profile-load-save.ts
5+
*
6+
* Runs the load → modify → save cycle multiple times to get
7+
* a representative CPU profile showing where time is spent.
8+
*/
9+
10+
import { readFileSync } from "node:fs";
11+
12+
import { PDF } from "../src/index.ts";
13+
14+
const HEAVY_PDF = "fixtures/benchmarks/cc-journalists-guide.pdf";
15+
const ITERATIONS = 20;
16+
17+
const pdfBytes = new Uint8Array(readFileSync(HEAVY_PDF));
18+
console.log(`PDF size: ${(pdfBytes.length / 1024 / 1024).toFixed(1)}MB`);
19+
20+
// Warm up
21+
{
22+
const pdf = await PDF.load(pdfBytes);
23+
const page = pdf.getPage(0)!;
24+
page.drawRectangle({ x: 50, y: 50, width: 100, height: 100 });
25+
await pdf.save();
26+
}
27+
28+
console.log(`Running ${ITERATIONS} iterations of load → modify → save...`);
29+
30+
const start = performance.now();
31+
32+
for (let i = 0; i < ITERATIONS; i++) {
33+
const pdf = await PDF.load(pdfBytes);
34+
const page = pdf.getPage(0)!;
35+
page.drawRectangle({ x: 50, y: 50, width: 100, height: 100 });
36+
await pdf.save();
37+
}
38+
39+
const elapsed = performance.now() - start;
40+
console.log(`Total: ${elapsed.toFixed(0)}ms`);
41+
console.log(`Average: ${(elapsed / ITERATIONS).toFixed(1)}ms per iteration`);

src/api/pdf.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,10 @@ export class PDF {
429429

430430
const pdfCatalog = new PDFCatalog(catalogDict, registry);
431431
const pagesRef = catalogDict.getRef("Pages");
432+
// Use registry.resolve so page tree objects are tracked for
433+
// modification detection and reachability analysis during save.
432434
const pages = pagesRef
433-
? PDFPageTree.load(pagesRef, parsed.getObject.bind(parsed))
435+
? PDFPageTree.load(pagesRef, registry.resolve.bind(registry))
434436
: PDFPageTree.empty();
435437

436438
// Load Info dictionary if present (for metadata access)
@@ -526,7 +528,7 @@ export class PDF {
526528
const pdfCatalog = new PDFCatalog(catalogDict, registry);
527529
const pagesRef = catalogDict.getRef("Pages");
528530
const pages = pagesRef
529-
? PDFPageTree.load(pagesRef, parsed.getObject.bind(parsed))
531+
? PDFPageTree.load(pagesRef, registry.resolve.bind(registry))
530532
: PDFPageTree.empty();
531533

532534
// Load Info dictionary if present (for metadata change tracking)
@@ -3171,7 +3173,7 @@ export class PDF {
31713173
return result;
31723174
}
31733175

3174-
// Full save (collectReachableRefs in writeComplete will load all reachable objects)
3176+
// Full save — write all objects in a single pass.
31753177
const result = writeComplete(this.ctx.registry, {
31763178
version: this.ctx.info.version,
31773179
root,
@@ -3182,6 +3184,8 @@ export class PDF {
31823184
securityHandler,
31833185
compressStreams: options.compressStreams,
31843186
compressionThreshold: options.compressionThreshold,
3187+
// Pre-size output buffer to avoid repeated doubling for large PDFs.
3188+
sizeHint: this.originalBytes.length > 0 ? this.originalBytes.length : undefined,
31853189
});
31863190

31873191
// Reset pending security state after successful save

src/filters/ascii-hex-filter.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ export class ASCIIHexFilter implements Filter {
2222
private static readonly NIBBLE_MASK = 0x0f;
2323

2424
decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
25-
const output = new ByteWriter();
25+
const output = new ByteWriter(undefined, {
26+
initialSize: Math.ceil(data.length / 2), // Hex is 2 chars per byte
27+
});
2628

2729
let high: number | null = null;
2830

src/filters/ascii85-filter.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ export class ASCII85Filter implements Filter {
2525
private static readonly ZERO_SHORTCUT = 0x7a;
2626

2727
decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
28-
const output = new ByteWriter();
28+
const output = new ByteWriter(undefined, {
29+
initialSize: Math.ceil((data.length * 4) / 5), // Estimate output size
30+
});
2931

3032
let buffer = 0;
3133
let count = 0;
@@ -102,7 +104,9 @@ export class ASCII85Filter implements Filter {
102104
}
103105

104106
encode(data: Uint8Array, _params?: PdfDict): Uint8Array {
105-
const output = new ByteWriter();
107+
const output = new ByteWriter(undefined, {
108+
initialSize: Math.ceil((data.length * 5) / 4) + 2, // Estimate output size + end marker
109+
});
106110

107111
// Process 4 bytes at a time
108112
let i = 0;

src/filters/lzw-filter.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ export class LZWFilter implements Filter {
4343
}
4444

4545
private lzwDecode(data: Uint8Array, earlyChange: number): Uint8Array {
46-
const output = new ByteWriter();
46+
const output = new ByteWriter(undefined, {
47+
initialSize: data.length * 4, // Estimate output size (LZW can expand up to 4x)
48+
});
4749

4850
// LZW constants
4951
// Bit reading state

src/filters/run-length-filter.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ export class RunLengthFilter implements Filter {
1919
readonly name = "RunLengthDecode";
2020

2121
decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
22-
const output = new ByteWriter();
22+
const output = new ByteWriter(undefined, {
23+
initialSize: data.length * 4, // Estimate output size (RLE can expand up to 4x)
24+
});
2325
let i = 0;
2426

2527
while (i < data.length) {
@@ -52,7 +54,10 @@ export class RunLengthFilter implements Filter {
5254
}
5355

5456
encode(data: Uint8Array, _params?: PdfDict): Uint8Array {
55-
const output = new ByteWriter();
57+
const output = new ByteWriter(undefined, {
58+
initialSize: data.length * 2, // Worst case (no runs)
59+
});
60+
5661
let i = 0;
5762

5863
while (i < data.length) {

src/helpers/format.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ export function parsePdfDate(str: string): Date | undefined {
9696
// Number Formatting
9797
// ─────────────────────────────────────────────────────────────────────────────
9898

99+
const TRAILING_ZERO_REGEX = /\.?0+$/;
100+
99101
/**
100102
* Format a number for PDF output.
101103
*
@@ -112,7 +114,7 @@ export function formatPdfNumber(value: number): string {
112114
let str = value.toFixed(5);
113115

114116
// Remove trailing zeros and unnecessary decimal point
115-
str = str.replace(/\.?0+$/, "");
117+
str = str.replace(TRAILING_ZERO_REGEX, "");
116118

117119
// Handle edge case where we stripped everything after decimal
118120
if (str === "" || str === "-") {

src/io/byte-writer.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,19 @@ export class ByteWriter {
2525
* @param options - Configuration options
2626
*/
2727
constructor(existingBytes?: Uint8Array, options: ByteWriterOptions = {}) {
28-
const initialSize = options.initialSize ?? 65536;
2928
this.maxSize = options.maxSize ?? Number.MAX_SAFE_INTEGER;
3029

3130
if (existingBytes) {
32-
// Start with existing bytes, leave room to grow
33-
const size = Math.max(existingBytes.length * 2, initialSize);
34-
this.buffer = new Uint8Array(size);
31+
// When initialSize is provided, use it directly — the caller knows the
32+
// expected final size. Otherwise default to 2x the existing bytes.
33+
const size = options.initialSize ?? existingBytes.length * 2;
34+
35+
this.buffer = new Uint8Array(Math.max(size, existingBytes.length));
3536
this.buffer.set(existingBytes);
37+
3638
this.offset = existingBytes.length;
3739
} else {
38-
this.buffer = new Uint8Array(initialSize);
40+
this.buffer = new Uint8Array(options.initialSize ?? 65536);
3941
}
4042
}
4143

@@ -105,11 +107,18 @@ export class ByteWriter {
105107

106108
/**
107109
* Get final bytes.
108-
* Returns a copy (slice) so the internal buffer can be garbage collected.
110+
*
111+
* If the internal buffer is exactly the right size, returns it directly
112+
* (zero-copy). Otherwise returns a trimmed copy so the oversized buffer
113+
* can be garbage collected.
109114
*
110115
* Note: ByteWriter is single-use. Do not write after calling toBytes().
111116
*/
112117
toBytes(): Uint8Array {
113-
return this.buffer.slice(0, this.offset);
118+
if (this.offset === this.buffer.length) {
119+
return this.buffer;
120+
}
121+
122+
return this.buffer.subarray(0, this.offset);
114123
}
115124
}

src/parser/indirect-object-parser.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,12 @@ export class IndirectObjectParser {
132132
// Get the stream length
133133
const length = this.resolveLength(dict);
134134

135-
// Read exactly `length` bytes
135+
// Read exactly `length` bytes.
136+
// Use subarray (zero-copy view) since the underlying PDF bytes
137+
// are kept alive by the PDF object for the document's lifetime.
136138
const startPos = this.scanner.position;
137-
const data = this.scanner.bytes.slice(startPos, startPos + length);
139+
const data = this.scanner.bytes.subarray(startPos, startPos + length);
140+
138141
this.scanner.moveTo(startPos + length);
139142

140143
// Skip optional EOL before "endstream"

src/writer/pdf-writer.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,14 @@ export interface WriteOptions {
7373
* The encrypt dictionary reference must also be provided.
7474
*/
7575
securityHandler?: StandardSecurityHandler;
76+
77+
/**
78+
* Hint for the final PDF size in bytes.
79+
*
80+
* When provided, the ByteWriter will pre-allocate a buffer of this size,
81+
* reducing the need for reallocations during writing.
82+
*/
83+
sizeHint?: number;
7684
}
7785

7886
/**
@@ -341,7 +349,10 @@ function collectReachableRefs(
341349
* ```
342350
*/
343351
export function writeComplete(registry: ObjectRegistry, options: WriteOptions): WriteResult {
344-
const writer = new ByteWriter();
352+
const writer = new ByteWriter(undefined, {
353+
initialSize: options.sizeHint,
354+
});
355+
345356
const compress = options.compressStreams ?? true;
346357
const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD;
347358

0 commit comments

Comments
 (0)