perf: reduce allocations and copies in load-modify-save path (#34)

Mythie · web-flow · commit bc102b6945c0 · 2026-02-23T14:21:36.000+11:00
We were only ~1.5x faster than pdf-lib for load → modify → save,
which is underwhelming given our architectural advantages. Profiling
with bun --cpu-prof showed the bottleneck was allocation churn and
unnecessary copying, not parsing or serialization logic.

Key changes:
- Pre-size ByteWriter buffers using size hints (original PDF length
  for full saves, estimated output sizes for filters/serializers)
  to avoid repeated geometric reallocation
- Use subarray instead of slice for stream data in the parser —
  these are zero-copy views into the original PDF bytes which stay
  alive for the document lifetime anyway
- Return the internal buffer directly from ByteWriter.toBytes()
  when it's already the right size (zero-copy fast path), fall back
  to subarray instead of slice for the trimmed case
- Hoist the trailing-zero regex in formatPdfNumber out of the
  function body so it isn't recompiled on every call
- Route page tree loading through registry.resolve so objects are
  tracked for modification detection (was using parsed.getObject
  which bypassed the registry)
diff --git a/scripts/profile-load-save.ts b/scripts/profile-load-save.ts
@@ -0,0 +1,41 @@
+/**
+ * CPU profiling script for load-modify-save workflow.
+ *
+ * Usage: bun --cpu-prof-md scripts/profile-load-save.ts
+ *
+ * Runs the load → modify → save cycle multiple times to get
+ * a representative CPU profile showing where time is spent.
+ */
+
+import { readFileSync } from "node:fs";
+
+import { PDF } from "../src/index.ts";
+
+const HEAVY_PDF = "fixtures/benchmarks/cc-journalists-guide.pdf";
+const ITERATIONS = 20;
+
+const pdfBytes = new Uint8Array(readFileSync(HEAVY_PDF));
+console.log(`PDF size: ${(pdfBytes.length / 1024 / 1024).toFixed(1)}MB`);
+
+// Warm up
+{
+  const pdf = await PDF.load(pdfBytes);
+  const page = pdf.getPage(0)!;
+  page.drawRectangle({ x: 50, y: 50, width: 100, height: 100 });
+  await pdf.save();
+}
+
+console.log(`Running ${ITERATIONS} iterations of load → modify → save...`);
+
+const start = performance.now();
+
+for (let i = 0; i < ITERATIONS; i++) {
+  const pdf = await PDF.load(pdfBytes);
+  const page = pdf.getPage(0)!;
+  page.drawRectangle({ x: 50, y: 50, width: 100, height: 100 });
+  await pdf.save();
+}
+
+const elapsed = performance.now() - start;
+console.log(`Total: ${elapsed.toFixed(0)}ms`);
+console.log(`Average: ${(elapsed / ITERATIONS).toFixed(1)}ms per iteration`);
diff --git a/src/api/pdf.ts b/src/api/pdf.ts
@@ -429,8 +429,10 @@ export class PDF {
 
     const pdfCatalog = new PDFCatalog(catalogDict, registry);
     const pagesRef = catalogDict.getRef("Pages");
+    // Use registry.resolve so page tree objects are tracked for
+    // modification detection and reachability analysis during save.
     const pages = pagesRef
-      ? PDFPageTree.load(pagesRef, parsed.getObject.bind(parsed))
+      ? PDFPageTree.load(pagesRef, registry.resolve.bind(registry))
       : PDFPageTree.empty();
 
     // Load Info dictionary if present (for metadata access)
@@ -526,7 +528,7 @@ export class PDF {
     const pdfCatalog = new PDFCatalog(catalogDict, registry);
     const pagesRef = catalogDict.getRef("Pages");
     const pages = pagesRef
-      ? PDFPageTree.load(pagesRef, parsed.getObject.bind(parsed))
+      ? PDFPageTree.load(pagesRef, registry.resolve.bind(registry))
       : PDFPageTree.empty();
 
     // Load Info dictionary if present (for metadata change tracking)
@@ -3171,7 +3173,7 @@ export class PDF {
       return result;
     }
 
-    // Full save (collectReachableRefs in writeComplete will load all reachable objects)
+    // Full save — write all objects in a single pass.
     const result = writeComplete(this.ctx.registry, {
       version: this.ctx.info.version,
       root,
@@ -3182,6 +3184,8 @@ export class PDF {
       securityHandler,
       compressStreams: options.compressStreams,
       compressionThreshold: options.compressionThreshold,
+      // Pre-size output buffer to avoid repeated doubling for large PDFs.
+      sizeHint: this.originalBytes.length > 0 ? this.originalBytes.length : undefined,
     });
 
     // Reset pending security state after successful save
diff --git a/src/filters/ascii-hex-filter.ts b/src/filters/ascii-hex-filter.ts
@@ -22,7 +22,9 @@ export class ASCIIHexFilter implements Filter {
   private static readonly NIBBLE_MASK = 0x0f;
 
   decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
-    const output = new ByteWriter();
+    const output = new ByteWriter(undefined, {
+      initialSize: Math.ceil(data.length / 2), // Hex is 2 chars per byte
+    });
 
     let high: number | null = null;
 
diff --git a/src/filters/ascii85-filter.ts b/src/filters/ascii85-filter.ts
@@ -25,7 +25,9 @@ export class ASCII85Filter implements Filter {
   private static readonly ZERO_SHORTCUT = 0x7a;
 
   decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
-    const output = new ByteWriter();
+    const output = new ByteWriter(undefined, {
+      initialSize: Math.ceil((data.length * 4) / 5), // Estimate output size
+    });
 
     let buffer = 0;
     let count = 0;
@@ -102,7 +104,9 @@ export class ASCII85Filter implements Filter {
   }
 
   encode(data: Uint8Array, _params?: PdfDict): Uint8Array {
-    const output = new ByteWriter();
+    const output = new ByteWriter(undefined, {
+      initialSize: Math.ceil((data.length * 5) / 4) + 2, // Estimate output size + end marker
+    });
 
     // Process 4 bytes at a time
     let i = 0;
diff --git a/src/filters/lzw-filter.ts b/src/filters/lzw-filter.ts
@@ -43,7 +43,9 @@ export class LZWFilter implements Filter {
   }
 
   private lzwDecode(data: Uint8Array, earlyChange: number): Uint8Array {
-    const output = new ByteWriter();
+    const output = new ByteWriter(undefined, {
+      initialSize: data.length * 4, // Estimate output size (LZW can expand up to 4x)
+    });
 
     // LZW constants
     // Bit reading state
diff --git a/src/filters/run-length-filter.ts b/src/filters/run-length-filter.ts
@@ -19,7 +19,9 @@ export class RunLengthFilter implements Filter {
   readonly name = "RunLengthDecode";
 
   decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
-    const output = new ByteWriter();
+    const output = new ByteWriter(undefined, {
+      initialSize: data.length * 4, // Estimate output size (RLE can expand up to 4x)
+    });
     let i = 0;
 
     while (i < data.length) {
@@ -52,7 +54,10 @@ export class RunLengthFilter implements Filter {
   }
 
   encode(data: Uint8Array, _params?: PdfDict): Uint8Array {
-    const output = new ByteWriter();
+    const output = new ByteWriter(undefined, {
+      initialSize: data.length * 2, // Worst case (no runs)
+    });
+
     let i = 0;
 
     while (i < data.length) {
diff --git a/src/helpers/format.ts b/src/helpers/format.ts
@@ -96,6 +96,8 @@ export function parsePdfDate(str: string): Date | undefined {
 // Number Formatting
 // ─────────────────────────────────────────────────────────────────────────────
 
+const TRAILING_ZERO_REGEX = /\.?0+$/;
+
 /**
  * Format a number for PDF output.
  *
@@ -112,7 +114,7 @@ export function formatPdfNumber(value: number): string {
   let str = value.toFixed(5);
 
   // Remove trailing zeros and unnecessary decimal point
-  str = str.replace(/\.?0+$/, "");
+  str = str.replace(TRAILING_ZERO_REGEX, "");
 
   // Handle edge case where we stripped everything after decimal
   if (str === "" || str === "-") {
diff --git a/src/io/byte-writer.ts b/src/io/byte-writer.ts
@@ -25,17 +25,19 @@ export class ByteWriter {
    * @param options - Configuration options
    */
   constructor(existingBytes?: Uint8Array, options: ByteWriterOptions = {}) {
-    const initialSize = options.initialSize ?? 65536;
     this.maxSize = options.maxSize ?? Number.MAX_SAFE_INTEGER;
 
     if (existingBytes) {
-      // Start with existing bytes, leave room to grow
-      const size = Math.max(existingBytes.length * 2, initialSize);
-      this.buffer = new Uint8Array(size);
+      // When initialSize is provided, use it directly — the caller knows the
+      // expected final size. Otherwise default to 2x the existing bytes.
+      const size = options.initialSize ?? existingBytes.length * 2;
+
+      this.buffer = new Uint8Array(Math.max(size, existingBytes.length));
       this.buffer.set(existingBytes);
+
       this.offset = existingBytes.length;
     } else {
-      this.buffer = new Uint8Array(initialSize);
+      this.buffer = new Uint8Array(options.initialSize ?? 65536);
     }
   }
 
@@ -105,11 +107,18 @@ export class ByteWriter {
 
   /**
    * Get final bytes.
-   * Returns a copy (slice) so the internal buffer can be garbage collected.
+   *
+   * If the internal buffer is exactly the right size, returns it directly
+   * (zero-copy). Otherwise returns a trimmed copy so the oversized buffer
+   * can be garbage collected.
    *
    * Note: ByteWriter is single-use. Do not write after calling toBytes().
    */
   toBytes(): Uint8Array {
-    return this.buffer.slice(0, this.offset);
+    if (this.offset === this.buffer.length) {
+      return this.buffer;
+    }
+
+    return this.buffer.subarray(0, this.offset);
   }
 }
diff --git a/src/parser/indirect-object-parser.ts b/src/parser/indirect-object-parser.ts
@@ -132,9 +132,12 @@ export class IndirectObjectParser {
     // Get the stream length
     const length = this.resolveLength(dict);
 
-    // Read exactly `length` bytes
+    // Read exactly `length` bytes.
+    // Use subarray (zero-copy view) since the underlying PDF bytes
+    // are kept alive by the PDF object for the document's lifetime.
     const startPos = this.scanner.position;
-    const data = this.scanner.bytes.slice(startPos, startPos + length);
+    const data = this.scanner.bytes.subarray(startPos, startPos + length);
+
     this.scanner.moveTo(startPos + length);
 
     // Skip optional EOL before "endstream"
diff --git a/src/writer/pdf-writer.ts b/src/writer/pdf-writer.ts
@@ -73,6 +73,14 @@ export interface WriteOptions {
    * The encrypt dictionary reference must also be provided.
    */
   securityHandler?: StandardSecurityHandler;
+
+  /**
+   * Hint for the final PDF size in bytes.
+   *
+   * When provided, the ByteWriter will pre-allocate a buffer of this size,
+   * reducing the need for reallocations during writing.
+   */
+  sizeHint?: number;
 }
 
 /**
@@ -341,7 +349,10 @@ function collectReachableRefs(
  * ```
  */
 export function writeComplete(registry: ObjectRegistry, options: WriteOptions): WriteResult {
-  const writer = new ByteWriter();
+  const writer = new ByteWriter(undefined, {
+    initialSize: options.sizeHint,
+  });
+
   const compress = options.compressStreams ?? true;
   const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD;
 
diff --git a/src/writer/serializer.ts b/src/writer/serializer.ts
@@ -18,7 +18,9 @@ import type { PdfRef } from "#src/objects/pdf-ref";
  * @returns The PDF byte representation
  */
 export function serializeObject(obj: PdfObject): Uint8Array {
-  const writer = new ByteWriter();
+  const writer = new ByteWriter(undefined, {
+    initialSize: 256, // Start with a reasonable buffer size
+  });
 
   // All PdfObject types implement PdfPrimitive
   obj.toBytes(writer);
@@ -36,7 +38,9 @@ export function serializeObject(obj: PdfObject): Uint8Array {
  * @returns The complete indirect object definition
  */
 export function serializeIndirectObject(ref: PdfRef, obj: PdfObject): Uint8Array {
-  const writer = new ByteWriter();
+  const writer = new ByteWriter(undefined, {
+    initialSize: 256, // Start with a reasonable buffer size
+  });
 
   writer.writeAscii(`${ref.objectNumber} ${ref.generation} obj\n`);
   obj.toBytes(writer);

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,9 @@ export class LZWFilter implements Filter {`
`43`	`43`	`}`
`44`	`44`
`45`	`45`	`private lzwDecode(data: Uint8Array, earlyChange: number): Uint8Array {`
`46`		`- const output = new ByteWriter();`
	`46`	`+ const output = new ByteWriter(undefined, {`
	`47`	`+ initialSize: data.length * 4, // Estimate output size (LZW can expand up to 4x)`
	`48`	`+ });`
`47`	`49`
`48`	`50`	`// LZW constants`
`49`	`51`	`// Bit reading state`