Skip to content

Commit 24607c3

Browse files
committed
Refactor PDF handling in executeStep function to improve download reliability and error management. Enhanced interception logic for PDF requests, added direct download attempts with cookie support, and streamlined the saving process with better logging. Updated the printToPDF step to utilize Playwright's page.pdf() method for generating PDFs directly from the current page.
1 parent c4508ac commit 24607c3

File tree

1 file changed

+179
-126
lines changed

1 file changed

+179
-126
lines changed

src/step-executor.ts

Lines changed: 179 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -420,92 +420,156 @@ export async function executeStep(
420420

421421
// Intercept responses to capture PDF even when displayed inline
422422
await page.route('**/*', async route => {
423-
const response = await route.fetch();
424-
const contentType = response.headers()['content-type'] || '';
425423
const url = route.request().url();
424+
const isPdfRequest = url.includes('.pdf');
426425

427-
// Check if this is a PDF response
428-
if (contentType.includes('application/pdf') || url.includes('.pdf')) {
429-
const buffer = await response.body();
430-
if (!pdfSaved && buffer.length > 0) {
431-
interceptedData.buffer = buffer;
432-
// Save immediately when intercepted
426+
// For non-PDF requests, continue normally for better performance
427+
if (!isPdfRequest) {
428+
await route.continue();
429+
return;
430+
}
431+
432+
// For PDF requests, fetch but handle timeouts gracefully
433+
try {
434+
const response = await route.fetch().catch(async (err: any) => {
435+
// If fetch fails (e.g., timeout), continue normally
436+
console.log(` 📄 Route fetch failed for ${url}: ${err.message}`);
437+
await route.continue();
438+
return null;
439+
});
440+
441+
if (!response) {
442+
return; // Already continued above
443+
}
444+
445+
const contentType = response.headers()['content-type'] || '';
446+
447+
// Check if this is a PDF response
448+
if (contentType.includes('application/pdf') || isPdfRequest) {
433449
try {
434-
fs.writeFileSync(resolvedPath, buffer);
435-
savedPath = resolvedPath;
436-
pdfSaved = true;
437-
console.log(
438-
` 📄 PDF intercepted and saved (${(buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
439-
);
440-
} catch (saveErr: any) {
441-
console.log(` 📄 Failed to save intercepted PDF: ${saveErr.message}`);
450+
const buffer = await response.body();
451+
if (!pdfSaved && buffer.length > 0) {
452+
interceptedData.buffer = buffer;
453+
// Save immediately when intercepted
454+
try {
455+
fs.writeFileSync(resolvedPath, buffer);
456+
savedPath = resolvedPath;
457+
pdfSaved = true;
458+
const sizeMB = (buffer.length / 1024 / 1024).toFixed(2);
459+
console.log(
460+
` 📄 PDF intercepted and saved (${sizeMB} MB) to ${resolvedPath}`
461+
);
462+
} catch (saveErr: any) {
463+
console.log(` 📄 Failed to save intercepted PDF: ${saveErr.message}`);
464+
}
465+
}
466+
} catch (bodyErr: any) {
467+
console.log(` 📄 Failed to read PDF body: ${bodyErr.message}`);
468+
// Continue even if body read fails
442469
}
443470
}
444-
}
445471

446-
// Continue with the normal response
447-
await route.fulfill({ response });
472+
// Continue with the normal response
473+
await route.fulfill({ response });
474+
} catch (err: any) {
475+
// If anything fails, continue normally
476+
console.log(` 📄 Route interception error: ${err.message}`);
477+
await route.continue();
478+
}
448479
});
449480

450481
const currentUrl = page.url();
451482
console.log(` 📄 Current URL: ${currentUrl}`);
452483

453-
// Check if we're already on a PDF URL - wait a bit for interception
484+
// Check if we're already on a PDF URL
454485
const isPdfUrl = currentUrl.includes('.pdf') || /\.pdf(\?|$)/i.test(currentUrl);
486+
487+
// If we're on a PDF URL and haven't saved yet, try direct download first (more reliable for large files)
455488
if (isPdfUrl && !pdfSaved) {
456-
// Wait a moment for route interception to catch the PDF if it's already loading
457-
await page.waitForTimeout(1000);
458-
}
459-
460-
// Try both approaches: wait for download event OR intercept response
461-
try {
462-
// Reload the page to trigger route interception (unless already saved)
463-
const [response, download] = await Promise.all([
464-
!pdfSaved ? page.reload({ waitUntil: 'networkidle' }).catch(() => null) : Promise.resolve(null),
465-
page.waitForEvent('download', { timeout: 5000 }).catch(() => null)
466-
]);
467-
468-
if (download) {
469-
// If download event occurred, save it
470-
await download.saveAs(resolvedPath);
471-
savedPath = resolvedPath;
472-
pdfSaved = true;
473-
console.log(` 📄 PDF saved via download event to ${resolvedPath}`);
474-
} else if (response) {
475-
// Check if the response itself is a PDF
476-
const contentType = response.headers()['content-type'] || '';
477-
if (contentType.includes('application/pdf') && !pdfSaved) {
478-
const buffer = await response.body();
479-
if (buffer.length > 0) {
480-
fs.writeFileSync(resolvedPath, buffer);
481-
savedPath = resolvedPath;
482-
pdfSaved = true;
483-
console.log(
484-
` 📄 PDF saved via response body (${(buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
485-
);
489+
try {
490+
console.log(` 📄 Detected PDF URL, attempting direct download...`);
491+
const ctx = page.context();
492+
const cookies = await ctx.cookies(currentUrl);
493+
const cookieHeader = cookies.map(c => `${c.name}=${c.value}`).join('; ');
494+
const api = await request.newContext({
495+
extraHTTPHeaders: {
496+
...(cookieHeader ? { Cookie: cookieHeader } : {}),
497+
Referer: currentUrl,
498+
'User-Agent': 'Mozilla/5.0'
486499
}
500+
});
501+
502+
// Use step.wait timeout (default to 5 minutes for large PDFs)
503+
const res = await api.get(currentUrl, {
504+
timeout: 300000
505+
});
506+
if (res.ok()) {
507+
const buffer = await res.body();
508+
fs.writeFileSync(resolvedPath, buffer);
509+
savedPath = resolvedPath;
510+
pdfSaved = true;
511+
const sizeMB = (buffer.length / 1024 / 1024).toFixed(2);
512+
console.log(` 📄 PDF downloaded directly (${sizeMB} MB) to ${resolvedPath}`);
513+
await api.dispose();
487514
} else {
488-
// Wait a bit for route interception to capture it
489-
await page.waitForTimeout(2000);
490-
if (interceptedData.buffer && !pdfSaved && interceptedData.buffer.length > 0) {
491-
fs.writeFileSync(resolvedPath, interceptedData.buffer);
492-
savedPath = resolvedPath;
493-
pdfSaved = true;
494-
console.log(
495-
` 📄 PDF saved via intercepted response (${(interceptedData.buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
496-
);
497-
}
515+
await api.dispose();
516+
console.log(` 📄 Direct download failed: ${res.status()} ${res.statusText()}`);
498517
}
499-
} else if (interceptedData.buffer && !pdfSaved && interceptedData.buffer.length > 0) {
500-
// Fallback: use intercepted buffer
501-
fs.writeFileSync(resolvedPath, interceptedData.buffer);
502-
savedPath = resolvedPath;
503-
pdfSaved = true;
504-
console.log(
505-
` 📄 PDF saved via intercepted response (${(interceptedData.buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
506-
);
518+
} catch (directErr: any) {
519+
console.log(` 📄 Direct download failed: ${directErr.message}, trying route interception...`);
507520
}
508-
} catch (error: any) {
521+
}
522+
523+
// Try both approaches: wait for download event OR intercept response (unless already saved)
524+
if (!pdfSaved) {
525+
try {
526+
// Reload the page to trigger route interception
527+
const [response, download] = await Promise.all([
528+
page.reload({ waitUntil: 'networkidle', timeout: step.wait ?? 60000 }).catch(() => null),
529+
page.waitForEvent('download', { timeout: 5000 }).catch(() => null)
530+
]);
531+
532+
if (download) {
533+
// If download event occurred, save it
534+
await download.saveAs(resolvedPath);
535+
savedPath = resolvedPath;
536+
pdfSaved = true;
537+
console.log(` 📄 PDF saved via download event to ${resolvedPath}`);
538+
} else if (response) {
539+
// Check if the response itself is a PDF
540+
const contentType = response.headers()['content-type'] || '';
541+
if (contentType.includes('application/pdf') && !pdfSaved) {
542+
const buffer = await response.body();
543+
if (buffer.length > 0) {
544+
fs.writeFileSync(resolvedPath, buffer);
545+
savedPath = resolvedPath;
546+
pdfSaved = true;
547+
console.log(
548+
` 📄 PDF saved via response body (${(buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
549+
);
550+
}
551+
} else {
552+
// Wait a bit for route interception to capture it
553+
await page.waitForTimeout(2000);
554+
if (interceptedData.buffer && !pdfSaved && interceptedData.buffer.length > 0) {
555+
fs.writeFileSync(resolvedPath, interceptedData.buffer);
556+
savedPath = resolvedPath;
557+
pdfSaved = true;
558+
console.log(
559+
` 📄 PDF saved via intercepted response (${(interceptedData.buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
560+
);
561+
}
562+
}
563+
} else if (interceptedData.buffer && !pdfSaved && interceptedData.buffer.length > 0) {
564+
// Fallback: use intercepted buffer
565+
fs.writeFileSync(resolvedPath, interceptedData.buffer);
566+
savedPath = resolvedPath;
567+
pdfSaved = true;
568+
console.log(
569+
` 📄 PDF saved via intercepted response (${(interceptedData.buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
570+
);
571+
}
572+
} catch (error: any) {
509573
console.log(` 📄 Error during PDF save: ${error.message}`);
510574
// Still try to save intercepted buffer if available
511575
if (interceptedData.buffer && !pdfSaved && interceptedData.buffer.length > 0) {
@@ -516,6 +580,7 @@ export async function executeStep(
516580
` 📄 PDF saved via intercepted response (${(interceptedData.buffer.length / 1024).toFixed(2)} KB) to ${resolvedPath}`
517581
);
518582
}
583+
}
519584
}
520585
} catch (err: any) {
521586
console.log(` 📄 savePDF failed: ${err.message}`);
@@ -540,7 +605,7 @@ export async function executeStep(
540605
break;
541606
}
542607
case 'printToPDF': {
543-
// Click button to open print dialog and save as PDF
608+
// Print current page as PDF using Playwright's page.pdf()
544609
if (!step.value) {
545610
throw new Error(`printToPDF step ${step.id} requires 'value' as target filepath`);
546611
}
@@ -549,68 +614,56 @@ export async function executeStep(
549614
let savedPath: string | null = null;
550615

551616
try {
552-
// Check if element exists first
553-
const locator = locatorFor(page, step.object_type as SelectorType | undefined, step.object ?? '');
554-
const count = await locator.count();
555-
556-
if (count === 0) {
557-
console.log(` ⚠️ Element not found: ${step.object} - skipping printToPDF action`);
558-
return;
559-
}
560-
561-
console.log(` 🖨️ Attempting to print PDF from element: ${step.object}`);
562-
563-
// Set up download listener with shorter timeout
564-
const downloadPromise = page.waitForEvent('download', { timeout: 10000 }).catch(() => null);
565-
566-
// Click the button that opens print dialog
567-
await locator.click();
568-
console.log(` 🖨️ Clicked print button`);
569-
570-
// Wait a moment for print dialog to appear
571-
await page.waitForTimeout(2000);
572-
573-
// Try multiple approaches to handle print dialog
574-
let download = null;
575-
576-
try {
577-
// Approach 1: Try keyboard shortcuts
578-
console.log(` 🖨️ Trying keyboard shortcuts (Ctrl+P)`);
579-
await page.keyboard.press('Control+P');
580-
await page.waitForTimeout(2000);
581-
await page.keyboard.press('Enter');
582-
583-
// Wait for download with shorter timeout
584-
download = await downloadPromise;
585-
} catch (keyboardErr: any) {
586-
console.log(` 🖨️ Keyboard shortcuts failed: ${keyboardErr.message}`);
587-
588-
// Approach 2: Try clicking the print button again if it's still there
617+
// If object is provided, click it first (for backward compatibility)
618+
if (step.object) {
589619
try {
590-
console.log(` 🖨️ Trying direct print button click`);
591-
await locator.click();
592-
await page.waitForTimeout(3000);
593-
download = await page.waitForEvent('download', { timeout: 5000 }).catch(() => null);
620+
const locator = locatorFor(page, step.object_type as SelectorType | undefined, step.object);
621+
const count = await locator.count();
622+
623+
if (count > 0) {
624+
await locator.click();
625+
console.log(` 🖨️ Clicked element: ${step.object}`);
626+
// Wait a bit for any navigation or content changes
627+
await page.waitForTimeout(step.wait ?? 1000);
628+
} else {
629+
console.log(` ⚠️ Element not found: ${step.object} - proceeding to print current page`);
630+
}
594631
} catch (clickErr: any) {
595-
console.log(` 🖨Direct click also failed: ${clickErr.message}`);
632+
console.log(` Failed to click element ${step.object}: ${clickErr.message} - proceeding to print current page`);
596633
}
597634
}
635+
636+
// Ensure the page finished loading
637+
try {
638+
await page.waitForLoadState('networkidle', { timeout: step.wait ?? 10000 });
639+
} catch {}
640+
641+
// Resolve file path with placeholders
642+
const targetPathBase: string = step.value as string;
643+
const resolvedPath: string = replaceDataPlaceholders(targetPathBase, collector) || targetPathBase;
644+
const dir = path.dirname(resolvedPath);
645+
if (!fs.existsSync(dir)) {
646+
fs.mkdirSync(dir, { recursive: true });
647+
}
648+
649+
console.log(` 🖨️ Generating PDF from current page...`);
598650

599-
if (download) {
600-
// Ensure directory exists
601-
const savePath = step.value;
602-
const dir = path.dirname(savePath);
603-
if (!fs.existsSync(dir)) {
604-
fs.mkdirSync(dir, { recursive: true });
651+
// Use Playwright's page.pdf() to generate PDF
652+
const pdfBuffer = await page.pdf({
653+
format: 'A4',
654+
printBackground: true,
655+
margin: {
656+
top: '0.5in',
657+
right: '0.5in',
658+
bottom: '0.5in',
659+
left: '0.5in'
605660
}
606-
607-
// Save the downloaded file
608-
await download.saveAs(savePath);
609-
savedPath = savePath;
610-
console.log(` 🖨️ Print PDF saved to ${savePath}`);
611-
} else {
612-
console.log(` 🖨️ No download event detected - print dialog may not have worked`);
613-
}
661+
});
662+
663+
// Save the PDF buffer to file
664+
fs.writeFileSync(resolvedPath, pdfBuffer);
665+
savedPath = resolvedPath;
666+
console.log(` 🖨️ PDF saved to ${resolvedPath} (${(pdfBuffer.length / 1024).toFixed(2)} KB)`);
614667

615668
} catch (err: any) {
616669
console.log(` 🖨️ PrintToPDF failed: ${err.message}`);

0 commit comments

Comments
 (0)