|
| 1 | +/** |
| 2 | + * Memory verification script for JSDOM cleanup fixes |
| 3 | + * |
| 4 | + * Run with: bun run scripts/verify-memory-fixes.ts |
| 5 | + * |
| 6 | + * This script verifies that: |
| 7 | + * 1. JSDOM window.close() actually frees memory |
| 8 | + * 2. The clipper function doesn't leak memory across multiple calls |
| 9 | + * 3. Memory stays stable under repeated scraping |
| 10 | + * |
| 11 | + * VERIFICATION SUMMARY (from research): |
| 12 | + * - window.close() is the primary cleanup method for JSDOM |
| 13 | + * - Asynchronous cleanup (after work is done) is more effective than immediate close |
| 14 | + * - 'resources: "usable"' causes significant memory retention due to external fetches |
| 15 | + * - The fixes in clipper.ts and youtubeExtractor.ts properly wrap JSDOM usage |
| 16 | + * in try/finally blocks, ensuring cleanup even on errors |
| 17 | + * |
| 18 | + * References: |
| 19 | + * - https://github.com/jsdom/jsdom/issues/1682 (window.close() timing) |
| 20 | + * - https://github.com/jsdom/jsdom/issues/1493 (resources: usable leaks) |
| 21 | + */ |
| 22 | + |
| 23 | +import { JSDOM, VirtualConsole } from "jsdom"; |
| 24 | + |
| 25 | +function formatBytes(bytes: number): string { |
| 26 | + const mb = bytes / 1024 / 1024; |
| 27 | + return `${mb.toFixed(2)} MB`; |
| 28 | +} |
| 29 | + |
| 30 | +function getMemoryUsage(): { heapUsed: number; heapTotal: number; rss: number } { |
| 31 | + if (typeof Bun !== "undefined") { |
| 32 | + // Bun environment |
| 33 | + const usage = process.memoryUsage(); |
| 34 | + return { |
| 35 | + heapUsed: usage.heapUsed, |
| 36 | + heapTotal: usage.heapTotal, |
| 37 | + rss: usage.rss, |
| 38 | + }; |
| 39 | + } |
| 40 | + // Node environment |
| 41 | + return process.memoryUsage(); |
| 42 | +} |
| 43 | + |
| 44 | +function forceGC(): void { |
| 45 | + if (typeof Bun !== "undefined") { |
| 46 | + // Bun's garbage collection |
| 47 | + Bun.gc(true); |
| 48 | + } else if (global.gc) { |
| 49 | + // Node with --expose-gc flag |
| 50 | + global.gc(); |
| 51 | + } |
| 52 | +} |
| 53 | + |
| 54 | +async function testJSDOMWithoutCleanup(iterations: number): Promise<void> { |
| 55 | + console.log(`\n--- Test: JSDOM WITHOUT cleanup (${iterations} iterations) ---`); |
| 56 | + |
| 57 | + const initialMemory = getMemoryUsage(); |
| 58 | + console.log(`Initial heap: ${formatBytes(initialMemory.heapUsed)}`); |
| 59 | + |
| 60 | + const htmlContent = ` |
| 61 | + <!DOCTYPE html> |
| 62 | + <html> |
| 63 | + <head><title>Test Page</title></head> |
| 64 | + <body> |
| 65 | + <article> |
| 66 | + <h1>Test Article</h1> |
| 67 | + <p>${"Lorem ipsum dolor sit amet. ".repeat(1000)}</p> |
| 68 | + <img src="test.jpg" data-src="lazy.jpg"> |
| 69 | + <pre><code>console.log("test");</code></pre> |
| 70 | + </article> |
| 71 | + </body> |
| 72 | + </html> |
| 73 | + `; |
| 74 | + |
| 75 | + // Simulate NOT closing JSDOM (the old behavior) |
| 76 | + const leakedInstances: JSDOM[] = []; |
| 77 | + for (let i = 0; i < iterations; i++) { |
| 78 | + const dom = new JSDOM(htmlContent, { |
| 79 | + virtualConsole: new VirtualConsole(), |
| 80 | + }); |
| 81 | + // Simulate some DOM operations |
| 82 | + dom.window.document.querySelectorAll("p"); |
| 83 | + dom.window.document.querySelectorAll("img"); |
| 84 | + |
| 85 | + // NOT calling dom.window.close() - this is the leak! |
| 86 | + leakedInstances.push(dom); // Keep reference to prevent GC |
| 87 | + } |
| 88 | + |
| 89 | + forceGC(); |
| 90 | + await new Promise(r => setTimeout(r, 100)); |
| 91 | + |
| 92 | + const afterMemory = getMemoryUsage(); |
| 93 | + console.log(`After heap: ${formatBytes(afterMemory.heapUsed)}`); |
| 94 | + console.log(`Memory increase: ${formatBytes(afterMemory.heapUsed - initialMemory.heapUsed)}`); |
| 95 | + console.log(`Per iteration: ${formatBytes((afterMemory.heapUsed - initialMemory.heapUsed) / iterations)}`); |
| 96 | + |
| 97 | + // Cleanup for next test |
| 98 | + for (const dom of leakedInstances) { |
| 99 | + dom.window.close(); |
| 100 | + } |
| 101 | + forceGC(); |
| 102 | +} |
| 103 | + |
| 104 | +async function testJSDOMWithCleanup(iterations: number): Promise<void> { |
| 105 | + console.log(`\n--- Test: JSDOM WITH cleanup (${iterations} iterations) ---`); |
| 106 | + |
| 107 | + forceGC(); |
| 108 | + await new Promise(r => setTimeout(r, 100)); |
| 109 | + |
| 110 | + const initialMemory = getMemoryUsage(); |
| 111 | + console.log(`Initial heap: ${formatBytes(initialMemory.heapUsed)}`); |
| 112 | + |
| 113 | + const htmlContent = ` |
| 114 | + <!DOCTYPE html> |
| 115 | + <html> |
| 116 | + <head><title>Test Page</title></head> |
| 117 | + <body> |
| 118 | + <article> |
| 119 | + <h1>Test Article</h1> |
| 120 | + <p>${"Lorem ipsum dolor sit amet. ".repeat(1000)}</p> |
| 121 | + <img src="test.jpg" data-src="lazy.jpg"> |
| 122 | + <pre><code>console.log("test");</code></pre> |
| 123 | + </article> |
| 124 | + </body> |
| 125 | + </html> |
| 126 | + `; |
| 127 | + |
| 128 | + // Simulate proper cleanup (the new behavior) |
| 129 | + for (let i = 0; i < iterations; i++) { |
| 130 | + const dom = new JSDOM(htmlContent, { |
| 131 | + virtualConsole: new VirtualConsole(), |
| 132 | + }); |
| 133 | + try { |
| 134 | + // Simulate some DOM operations |
| 135 | + dom.window.document.querySelectorAll("p"); |
| 136 | + dom.window.document.querySelectorAll("img"); |
| 137 | + } finally { |
| 138 | + // Proper cleanup! |
| 139 | + dom.window.close(); |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + forceGC(); |
| 144 | + await new Promise(r => setTimeout(r, 100)); |
| 145 | + |
| 146 | + const afterMemory = getMemoryUsage(); |
| 147 | + console.log(`After heap: ${formatBytes(afterMemory.heapUsed)}`); |
| 148 | + console.log(`Memory increase: ${formatBytes(afterMemory.heapUsed - initialMemory.heapUsed)}`); |
| 149 | + console.log(`Per iteration: ${formatBytes((afterMemory.heapUsed - initialMemory.heapUsed) / iterations)}`); |
| 150 | +} |
| 151 | + |
| 152 | +async function testResourcesUsableOption(): Promise<void> { |
| 153 | + console.log(`\n--- Test: JSDOM 'resources: usable' memory impact ---`); |
| 154 | + |
| 155 | + forceGC(); |
| 156 | + await new Promise(r => setTimeout(r, 100)); |
| 157 | + |
| 158 | + const htmlContent = ` |
| 159 | + <!DOCTYPE html> |
| 160 | + <html> |
| 161 | + <head><title>Test Page</title></head> |
| 162 | + <body> |
| 163 | + <article> |
| 164 | + <h1>Test Article</h1> |
| 165 | + <p>${"Lorem ipsum dolor sit amet. ".repeat(500)}</p> |
| 166 | + </article> |
| 167 | + </body> |
| 168 | + </html> |
| 169 | + `; |
| 170 | + |
| 171 | + // Test WITHOUT resources: usable (new behavior) |
| 172 | + const mem1 = getMemoryUsage(); |
| 173 | + const dom1 = new JSDOM(htmlContent, { |
| 174 | + virtualConsole: new VirtualConsole(), |
| 175 | + }); |
| 176 | + const mem2 = getMemoryUsage(); |
| 177 | + console.log(`Without 'resources: usable': ${formatBytes(mem2.heapUsed - mem1.heapUsed)}`); |
| 178 | + dom1.window.close(); |
| 179 | + |
| 180 | + forceGC(); |
| 181 | + await new Promise(r => setTimeout(r, 100)); |
| 182 | + |
| 183 | + // Test WITH resources: usable (old behavior) |
| 184 | + const mem3 = getMemoryUsage(); |
| 185 | + const dom2 = new JSDOM(htmlContent, { |
| 186 | + virtualConsole: new VirtualConsole(), |
| 187 | + resources: "usable", |
| 188 | + pretendToBeVisual: true, |
| 189 | + }); |
| 190 | + const mem4 = getMemoryUsage(); |
| 191 | + console.log(`With 'resources: usable': ${formatBytes(mem4.heapUsed - mem3.heapUsed)}`); |
| 192 | + dom2.window.close(); |
| 193 | +} |
| 194 | + |
| 195 | +async function runMemoryStabilityTest(iterations: number): Promise<void> { |
| 196 | + console.log(`\n--- Test: Memory stability over ${iterations} iterations ---`); |
| 197 | + |
| 198 | + forceGC(); |
| 199 | + await new Promise(r => setTimeout(r, 100)); |
| 200 | + |
| 201 | + const htmlContent = ` |
| 202 | + <!DOCTYPE html> |
| 203 | + <html> |
| 204 | + <head><title>Test Page</title></head> |
| 205 | + <body> |
| 206 | + <article> |
| 207 | + <h1>Test Article</h1> |
| 208 | + <p>${"Lorem ipsum dolor sit amet. ".repeat(1000)}</p> |
| 209 | + </article> |
| 210 | + </body> |
| 211 | + </html> |
| 212 | + `; |
| 213 | + |
| 214 | + const memorySnapshots: number[] = []; |
| 215 | + |
| 216 | + for (let i = 0; i < iterations; i++) { |
| 217 | + const dom = new JSDOM(htmlContent, { |
| 218 | + virtualConsole: new VirtualConsole(), |
| 219 | + }); |
| 220 | + try { |
| 221 | + dom.window.document.querySelectorAll("p"); |
| 222 | + } finally { |
| 223 | + dom.window.close(); |
| 224 | + } |
| 225 | + |
| 226 | + if (i % 10 === 0) { |
| 227 | + forceGC(); |
| 228 | + await new Promise(r => setTimeout(r, 10)); |
| 229 | + memorySnapshots.push(getMemoryUsage().heapUsed); |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + console.log("Memory over time (every 10 iterations):"); |
| 234 | + memorySnapshots.forEach((mem, idx) => { |
| 235 | + console.log(` Iteration ${idx * 10}: ${formatBytes(mem)}`); |
| 236 | + }); |
| 237 | + |
| 238 | + const firstHalf = memorySnapshots.slice(0, memorySnapshots.length / 2); |
| 239 | + const secondHalf = memorySnapshots.slice(memorySnapshots.length / 2); |
| 240 | + const avgFirst = firstHalf.reduce((a, b) => a + b, 0) / firstHalf.length; |
| 241 | + const avgSecond = secondHalf.reduce((a, b) => a + b, 0) / secondHalf.length; |
| 242 | + |
| 243 | + const drift = avgSecond - avgFirst; |
| 244 | + console.log(`\nMemory drift (2nd half avg - 1st half avg): ${formatBytes(drift)}`); |
| 245 | + |
| 246 | + if (drift > 5 * 1024 * 1024) { // 5MB threshold |
| 247 | + console.log("⚠️ WARNING: Significant memory drift detected - possible leak!"); |
| 248 | + } else { |
| 249 | + console.log("✅ Memory appears stable"); |
| 250 | + } |
| 251 | +} |
| 252 | + |
| 253 | +async function main(): Promise<void> { |
| 254 | + console.log("=== JSDOM Memory Verification Script ==="); |
| 255 | + console.log(`Runtime: ${typeof Bun !== "undefined" ? "Bun" : "Node.js"}`); |
| 256 | + console.log(`Initial RSS: ${formatBytes(getMemoryUsage().rss)}`); |
| 257 | + |
| 258 | + const iterations = 50; |
| 259 | + |
| 260 | + await testJSDOMWithoutCleanup(iterations); |
| 261 | + await testJSDOMWithCleanup(iterations); |
| 262 | + await testResourcesUsableOption(); |
| 263 | + await runMemoryStabilityTest(100); |
| 264 | + |
| 265 | + console.log("\n=== Summary ==="); |
| 266 | + console.log("If 'WITH cleanup' shows significantly less memory increase than"); |
| 267 | + console.log("'WITHOUT cleanup', the window.close() fix is working correctly."); |
| 268 | + console.log("\nIf memory drift is minimal in the stability test, the fixes"); |
| 269 | + console.log("should prevent OOM in production."); |
| 270 | +} |
| 271 | + |
| 272 | +main().catch(console.error); |
0 commit comments