Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
903 changes: 903 additions & 0 deletions ARCHITECTURE.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion jest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ export default {

// An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
transformIgnorePatterns: [
'node_modules/(?!(@paralleldrive/cuid2|@noble|formidable|superagent|chalk|#ansi-styles|#supports-color)/)',
'node_modules/(?!(@paralleldrive/cuid2|@noble|formidable|superagent|chalk|#ansi-styles|#supports-color|puppeteer-autoscroll-down)/)',
],

// An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
"console-stamp": "^3.1.2",
"express": "^5.1.0",
"fs-extra": "^11.2.0",
"html-entities": "^2.5.2",
"pdf-lib": "^1.17.1",
"puppeteer": "^24.0.0",
"puppeteer-autoscroll-down": "^2.0.0",
"puppeteer-core": "^24.0.0",
Expand Down
2 changes: 1 addition & 1 deletion src/cli.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env node

import * as command from './command';
import * as command from './command/command';

const program = command.makeProgram();

Expand Down
18 changes: 3 additions & 15 deletions src/command.ts → src/command/command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ import {
commaSeparatedList,
generatePuppeteerPDFMargin,
} from './commander-options';
import { generatePDF, GeneratePDFOptions } from './core';
import { generatePDF, GeneratePDFOptions } from '../core';
import {
generateDocusaurusPDF,
DocusaurusOptions,
} from './provider/docusaurus';
} from '../provider/docusaurus';
import chalk from 'chalk';
import console_stamp from 'console-stamp';
import packageJson from '../package.json';
import packageJson from '../../package.json';

const version = packageJson.version;

Expand Down Expand Up @@ -182,18 +182,6 @@ export function makeProgram() {
.option(
'--openDetail',
'open details elements in the PDF, default is open',
)
.option(
'--extractIframes',
'extract and inline content from iframes (only same-origin or accessible iframes)',
)
.option(
'--httpAuthUser <username>',
'HTTP Basic Auth username for protected documentation sites',
)
.option(
'--httpAuthPassword <password>',
'HTTP Basic Auth password for protected documentation sites',
);
});

Expand Down
File renamed without changes.
83 changes: 31 additions & 52 deletions src/core.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
import chalk from 'chalk';
import console_stamp from 'console-stamp';
import * as puppeteer from 'puppeteer-core';
import { scrollPageToBottom } from 'puppeteer-autoscroll-down';
import * as fs from 'fs-extra';
import { chromeExecPath } from './browser';
import * as utils from './utils';
import { delay } from './utils';
import { PDF, PDFOptions } from './pdf/generate';

console_stamp(console);

export interface GeneratePDFOptions {
export interface GeneratePDFOptions extends PDFOptions {
initialDocURLs: Array<string>;
excludeURLs: Array<string>;
outputPDFFilename: string;
pdfMargin: puppeteer.PDFOptions['margin'];
contentSelector: string;
paginationSelector: string;
// deprecated - user paperFormat
pdfFormat?: puppeteer.PaperFormat;
paperFormat: puppeteer.PaperFormat;
excludeSelectors: Array<string>;
cssStyle: string;
puppeteerArgs: Array<string>;
Expand All @@ -28,8 +25,6 @@ export interface GeneratePDFOptions {
disableCover: boolean;
coverSub: string;
waitForRender: number;
headerTemplate: string;
footerTemplate: string;
protocolTimeout: number;
filterKeyword: string;
baseUrl: string;
Expand All @@ -42,36 +37,32 @@ export interface GeneratePDFOptions {
}

/* c8 ignore start */
export async function generatePDF({
initialDocURLs,
excludeURLs,
outputPDFFilename = 'docs-to-pdf.pdf',
pdfMargin = { top: 32, right: 32, bottom: 32, left: 32 },
contentSelector,
paginationSelector,
paperFormat,
excludeSelectors,
cssStyle,
puppeteerArgs,
coverTitle,
coverImage,
disableTOC,
tocTitle,
disableCover,
coverSub,
waitForRender,
headerTemplate,
footerTemplate,
protocolTimeout,
filterKeyword,
baseUrl,
excludePaths,
restrictPaths,
openDetail = true,
extractIframes = false,
httpAuthUser,
httpAuthPassword,
}: GeneratePDFOptions): Promise<void> {
export async function generatePDF(options: GeneratePDFOptions): Promise<void> {
const {
initialDocURLs,
excludeURLs,
contentSelector,
paginationSelector,
excludeSelectors,
cssStyle,
puppeteerArgs,
coverTitle,
coverImage,
disableTOC,
tocTitle,
disableCover,
coverSub,
waitForRender,
protocolTimeout,
filterKeyword,
baseUrl,
excludePaths,
restrictPaths,
openDetail = true,
extractIframes = false,
httpAuthUser,
httpAuthPassword,
} = options;
const execPath = process.env.PUPPETEER_EXECUTABLE_PATH ?? chromeExecPath();
console.debug(chalk.cyan(`Using Chromium from ${execPath}`));
const browser = await puppeteer.launch({
Expand Down Expand Up @@ -252,23 +243,11 @@ export async function generatePDF({
// Scroll to the bottom of the page with puppeteer-autoscroll-down
// This forces lazy-loading images to load
console.log(chalk.cyan('Scroll to the bottom of the page...'));
const { scrollPageToBottom } = await import('puppeteer-autoscroll-down');
await scrollPageToBottom(page, {}); //cast to puppeteer-core type

// Generate PDF
console.log(chalk.cyan('Generate PDF...'));
await page.pdf({
path: outputPDFFilename,
format: paperFormat,
printBackground: true,
margin: pdfMargin,
displayHeaderFooter: !!(headerTemplate || footerTemplate),
headerTemplate,
footerTemplate,
timeout: 0,
});

console.log(chalk.green(`PDF generated at ${outputPDFFilename}`));
const pdf = new PDF(options);
await pdf.generate(page);
} finally {
// Always close browser and cleanup temp directory, even if PDF generation fails
await browser.close();
Expand Down
134 changes: 134 additions & 0 deletions src/pdf/generate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import * as puppeteer from 'puppeteer-core';
import chalk from 'chalk';
import { getOutline, setOutline, OutlineNode } from './outline';
import { PDFDocument } from 'pdf-lib';
import { writeFile } from 'fs/promises';

/**
* Count total number of headings in the outline tree
*/
function countTotalHeadings(outlines: OutlineNode[]): number {
let count = 0;
for (const item of outlines) {
count++;
count += countTotalHeadings(item.children);
}
return count;
}

export interface PDFOptions {
outputPDFFilename: string;
paperFormat: puppeteer.PaperFormat;
pdfFormat?: puppeteer.PaperFormat;
pdfMargin: puppeteer.PDFOptions['margin'];
headerTemplate: string;
footerTemplate: string;
}

export class PDF {
private readonly options: PDFOptions;

constructor(options: PDFOptions) {
this.options = options;
}

/**
* Generate PDF
* @param page
* @returns
* @throws {Error} - if page.pdf() fails
*/
public async generate(page: puppeteer.Page): Promise<void> {
console.log(chalk.cyan('Generate PDF...'));

// Get page dimensions for coordinate mapping
const pageDimensions = await page.evaluate(() => {
return {
width: document.documentElement.scrollWidth,
height: document.documentElement.scrollHeight,
};
});

console.log(chalk.cyan('Extracting headings from document...'));

// Listen for console messages from the browser context (for progress updates)
const consoleListener = (msg: { text: () => string }) => {
const text = msg.text();
if (text.startsWith('Processing headings...')) {
console.log(chalk.cyan(text));
}
};
page.on('console', consoleListener);

const outline = await getOutline(page, [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
]);

// Remove the console listener after extraction
page.off('console', consoleListener);

const totalHeadings = countTotalHeadings(outline);
if (totalHeadings > 0) {
console.log(chalk.green(`✓ Found ${totalHeadings} headings`));
} else {
console.log(
chalk.yellow(
'No headings found - PDF will be generated without bookmarks',
),
);
}

const pdfExportOptions = {
path: this.options.outputPDFFilename ?? 'output.pdf',
format: this.options.paperFormat,
margin: this.options.pdfMargin ?? {
top: 32,
right: 32,
bottom: 32,
left: 32,
},
headerTemplate: this.options.headerTemplate,
footerTemplate: this.options.footerTemplate,
displayHeaderFooter: !!(
this.options.headerTemplate || this.options.footerTemplate
),
printBackground: true,
timeout: 0,
};

const pdf = await page.pdf(pdfExportOptions).catch((err) => {
console.error(chalk.red(err));
throw err; // Re-throw original error to preserve stack trace
});
const pdfDoc = await PDFDocument.load(pdf);

// Get PDF page dimensions (first page, assuming all pages same size)
const pdfPage = pdfDoc.getPage(0);
const pdfPageHeight = pdfPage.getHeight();

if (totalHeadings > 0) {
await setOutline(
pdfDoc,
outline,
pageDimensions.height,
pdfPageHeight,
true,
);
console.log(chalk.green(`✓ Created ${totalHeadings} bookmarks`));
}

console.log(chalk.cyan('Saving PDF...'));
const buffer = await pdfDoc.save();
await writeFile(this.options.outputPDFFilename ?? 'output.pdf', buffer);
console.log(
chalk.green(
`PDF generated at ${this.options.outputPDFFilename ?? 'output.pdf'}`,
),
);
}
}
Loading