|
| 1 | +import type { NextApiRequest, NextApiResponse } from "next"; |
| 2 | +import { marked } from "marked"; |
| 3 | + |
| 4 | +// Allowed hostnames for markdown sourcing. Only fetch markdown from trusted hosts. |
| 5 | +const ALLOWED_HOSTNAMES = [ |
| 6 | + "langfuse.com", |
| 7 | + "raw.githubusercontent.com", |
| 8 | + "github.com", |
| 9 | +]; |
| 10 | + |
| 11 | +/** |
| 12 | + * Remove anchor tags from headings (e.g., [#anchor-id]) |
| 13 | + * These are useful for web navigation but not needed in PDFs |
| 14 | + */ |
| 15 | +function removeAnchorTags(content: string): string { |
| 16 | + // Match [#anchor-id] at the end of headings |
| 17 | + return content.replace(/\s*\[#[\w-]+\]/g, ""); |
| 18 | +} |
| 19 | + |
| 20 | +/** |
| 21 | + * Process MDX Callout components and convert them to HTML divs |
| 22 | + * Supports types: info, warn, warning, error, danger |
| 23 | + */ |
| 24 | +function processCallouts(content: string): string { |
| 25 | + // Match <Callout type="...">...</Callout> (including self-closing and multiline) |
| 26 | + const calloutRegex = |
| 27 | + /<Callout\s+type=["'](\w+)["']\s*>([\s\S]*?)<\/Callout>/g; |
| 28 | + |
| 29 | + return content.replace(calloutRegex, (match, type, innerContent) => { |
| 30 | + // The innerContent might contain markdown that will be processed later |
| 31 | + // Wrap it in a special div that we'll style |
| 32 | + return `<div class="callout callout-${type}">${innerContent}</div>`; |
| 33 | + }); |
| 34 | +} |
| 35 | + |
| 36 | +export default async function handler( |
| 37 | + req: NextApiRequest, |
| 38 | + res: NextApiResponse |
| 39 | +) { |
| 40 | + try { |
| 41 | + // Get the markdown URL from query parameters |
| 42 | + const { url, disposition } = req.query; |
| 43 | + |
| 44 | + if (!url || typeof url !== "string") { |
| 45 | + return res.status(400).json({ |
| 46 | + error: "Missing or invalid 'url' query parameter", |
| 47 | + }); |
| 48 | + } |
| 49 | + |
| 50 | + // Validate URL |
| 51 | + let markdownUrl: URL; |
| 52 | + try { |
| 53 | + markdownUrl = new URL(url); |
| 54 | + } catch (error) { |
| 55 | + return res.status(400).json({ |
| 56 | + error: "Invalid URL format", |
| 57 | + }); |
| 58 | + } |
| 59 | + |
| 60 | + // Check hostname against allow-list to prevent SSRF in production |
| 61 | + // Skip in dev to allow for tests against devserver |
| 62 | + if ( |
| 63 | + process.env.NODE_ENV !== "development" && |
| 64 | + !ALLOWED_HOSTNAMES.includes(markdownUrl.hostname) |
| 65 | + ) { |
| 66 | + return res.status(400).json({ |
| 67 | + error: `Fetching from ${markdownUrl.hostname} is not permitted.`, |
| 68 | + allowed: ALLOWED_HOSTNAMES, |
| 69 | + }); |
| 70 | + } |
| 71 | + |
| 72 | + // Fetch the markdown content |
| 73 | + const response = await fetch(markdownUrl.toString()); |
| 74 | + |
| 75 | + if (!response.ok) { |
| 76 | + return res.status(response.status).json({ |
| 77 | + error: `Failed to fetch markdown: ${response.statusText}`, |
| 78 | + }); |
| 79 | + } |
| 80 | + |
| 81 | + let markdownContent = await response.text(); |
| 82 | + |
| 83 | + // Strip frontmatter (YAML between --- delimiters) |
| 84 | + markdownContent = markdownContent.replace( |
| 85 | + /^---\r?\n[\s\S]*?\r?\n---\r?\n/, |
| 86 | + "" |
| 87 | + ); |
| 88 | + |
| 89 | + // Remove anchor tags from headings (not needed in PDF) |
| 90 | + markdownContent = removeAnchorTags(markdownContent); |
| 91 | + |
| 92 | + // Convert markdown to HTML |
| 93 | + let htmlContent = await marked.parse(markdownContent); |
| 94 | + |
| 95 | + // Process Callout components in the HTML |
| 96 | + htmlContent = processCallouts(htmlContent); |
| 97 | + |
| 98 | + // Create a complete HTML document with styling |
| 99 | + const fullHtml = ` |
| 100 | + <!DOCTYPE html> |
| 101 | + <html> |
| 102 | + <head> |
| 103 | + <meta charset="UTF-8"> |
| 104 | + <style> |
| 105 | + body { |
| 106 | + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; |
| 107 | + line-height: 1.6; |
| 108 | + color: #333; |
| 109 | + max-width: 800px; |
| 110 | + margin: 0 auto; |
| 111 | + padding: 20px; |
| 112 | + } |
| 113 | + h1, h2, h3, h4, h5, h6 { |
| 114 | + margin-top: 24px; |
| 115 | + margin-bottom: 16px; |
| 116 | + font-weight: 600; |
| 117 | + line-height: 1.25; |
| 118 | + } |
| 119 | + h1 { font-size: 2em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; } |
| 120 | + h2 { font-size: 1.5em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; } |
| 121 | + h3 { font-size: 1.25em; } |
| 122 | + h4 { font-size: 1em; } |
| 123 | + h5 { font-size: 0.875em; } |
| 124 | + h6 { font-size: 0.85em; color: #6a737d; } |
| 125 | + p { margin-bottom: 16px; } |
| 126 | + a { color: #0366d6; text-decoration: none; } |
| 127 | + a:hover { text-decoration: underline; } |
| 128 | + code { |
| 129 | + background-color: rgba(27, 31, 35, 0.05); |
| 130 | + border-radius: 3px; |
| 131 | + padding: 0.2em 0.4em; |
| 132 | + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; |
| 133 | + font-size: 85%; |
| 134 | + } |
| 135 | + pre { |
| 136 | + background-color: #f6f8fa; |
| 137 | + border-radius: 3px; |
| 138 | + padding: 16px; |
| 139 | + overflow: auto; |
| 140 | + line-height: 1.45; |
| 141 | + } |
| 142 | + pre code { |
| 143 | + background-color: transparent; |
| 144 | + padding: 0; |
| 145 | + } |
| 146 | + blockquote { |
| 147 | + border-left: 4px solid #dfe2e5; |
| 148 | + padding-left: 16px; |
| 149 | + color: #6a737d; |
| 150 | + margin-left: 0; |
| 151 | + } |
| 152 | + ul, ol { |
| 153 | + margin-bottom: 16px; |
| 154 | + padding-left: 2em; |
| 155 | + } |
| 156 | + li { |
| 157 | + margin-bottom: 4px; |
| 158 | + } |
| 159 | + table { |
| 160 | + border-collapse: collapse; |
| 161 | + width: 100%; |
| 162 | + margin-bottom: 16px; |
| 163 | + } |
| 164 | + table th, table td { |
| 165 | + border: 1px solid #dfe2e5; |
| 166 | + padding: 6px 13px; |
| 167 | + } |
| 168 | + table th { |
| 169 | + background-color: #f6f8fa; |
| 170 | + font-weight: 600; |
| 171 | + } |
| 172 | + img { |
| 173 | + max-width: 100%; |
| 174 | + height: auto; |
| 175 | + } |
| 176 | + hr { |
| 177 | + border: 0; |
| 178 | + border-top: 1px solid #eaecef; |
| 179 | + margin: 24px 0; |
| 180 | + } |
| 181 | + .source-url { |
| 182 | + color: #6a737d; |
| 183 | + font-size: 0.875em; |
| 184 | + padding: 12px 0; |
| 185 | + margin-bottom: 24px; |
| 186 | + border-bottom: 2px solid #eaecef; |
| 187 | + word-break: break-all; |
| 188 | + } |
| 189 | + .source-url strong { |
| 190 | + color: #24292e; |
| 191 | + font-weight: 600; |
| 192 | + } |
| 193 | + /* Callout component styles */ |
| 194 | + .callout { |
| 195 | + padding: 16px; |
| 196 | + margin: 16px 0; |
| 197 | + border-radius: 6px; |
| 198 | + border-left: 4px solid; |
| 199 | + background-color: #f6f8fa; |
| 200 | + page-break-inside: avoid; |
| 201 | + } |
| 202 | + .callout p:first-child { |
| 203 | + margin-top: 0; |
| 204 | + } |
| 205 | + .callout p:last-child { |
| 206 | + margin-bottom: 0; |
| 207 | + } |
| 208 | + .callout-info { |
| 209 | + border-left-color: #0969da; |
| 210 | + background-color: #ddf4ff; |
| 211 | + } |
| 212 | + .callout-warn, |
| 213 | + .callout-warning { |
| 214 | + border-left-color: #d4a72c; |
| 215 | + background-color: #fff8dc; |
| 216 | + } |
| 217 | + .callout-error, |
| 218 | + .callout-danger { |
| 219 | + border-left-color: #cf222e; |
| 220 | + background-color: #ffebe9; |
| 221 | + } |
| 222 | + </style> |
| 223 | + </head> |
| 224 | + <body> |
| 225 | + <div class="source-url"> |
| 226 | + <strong>Source:</strong> ${markdownUrl.toString()}<br/> |
| 227 | + <strong>PDF created at:</strong> ${new Date().toISOString()} |
| 228 | + </div> |
| 229 | + ${htmlContent} |
| 230 | + </body> |
| 231 | + </html> |
| 232 | + `; |
| 233 | + |
| 234 | + // Launch Puppeteer and generate PDF |
| 235 | + // Use local Chrome for development, serverless Chromium for production |
| 236 | + const isDev = process.env.NODE_ENV === "development"; |
| 237 | + |
| 238 | + let browser; |
| 239 | + if (isDev) { |
| 240 | + // Use puppeteer with bundled Chromium for local development |
| 241 | + const puppeteer = await import("puppeteer"); |
| 242 | + browser = await puppeteer.default.launch({ |
| 243 | + headless: true, |
| 244 | + args: ["--no-sandbox", "--disable-setuid-sandbox"], |
| 245 | + }); |
| 246 | + } else { |
| 247 | + // Use puppeteer-core with serverless Chromium for production |
| 248 | + const puppeteerCore = await import("puppeteer-core"); |
| 249 | + const chromium = await import("@sparticuz/chromium"); |
| 250 | + browser = await puppeteerCore.default.launch({ |
| 251 | + args: chromium.default.args, |
| 252 | + executablePath: await chromium.default.executablePath(), |
| 253 | + headless: true, |
| 254 | + }); |
| 255 | + } |
| 256 | + |
| 257 | + try { |
| 258 | + const page = await browser.newPage(); |
| 259 | + await page.setContent(fullHtml, { waitUntil: "networkidle0" }); |
| 260 | + |
| 261 | + const pdf = await page.pdf({ |
| 262 | + format: "A4", |
| 263 | + printBackground: true, |
| 264 | + margin: { |
| 265 | + top: "1cm", |
| 266 | + right: "1cm", |
| 267 | + bottom: "1cm", |
| 268 | + left: "1cm", |
| 269 | + }, |
| 270 | + }); |
| 271 | + |
| 272 | + // Extract filename from URL |
| 273 | + const pathname = markdownUrl.pathname; |
| 274 | + const filename = pathname.split("/").pop() || "document.md"; |
| 275 | + const pdfFilename = filename.replace(/\.mdx?$/i, ".pdf"); |
| 276 | + |
| 277 | + // Determine content disposition (default to inline) |
| 278 | + const contentDisposition = |
| 279 | + disposition === "download" ? "attachment" : "inline"; |
| 280 | + |
| 281 | + // Set response headers |
| 282 | + res.setHeader("Content-Type", "application/pdf"); |
| 283 | + res.setHeader( |
| 284 | + "Content-Disposition", |
| 285 | + `${contentDisposition}; filename="${pdfFilename}"` |
| 286 | + ); |
| 287 | + res.setHeader("Content-Length", pdf.length); |
| 288 | + // Cache for 60 seconds on CDN, serve stale while revalidating for 24 hours |
| 289 | + res.setHeader( |
| 290 | + "Cache-Control", |
| 291 | + "public, s-maxage=60, stale-while-revalidate=86400" |
| 292 | + ); |
| 293 | + |
| 294 | + // Send the PDF as a buffer |
| 295 | + res.status(200).end(pdf); |
| 296 | + } finally { |
| 297 | + // Ensure browser is always closed, even if an error occurs |
| 298 | + await browser.close(); |
| 299 | + } |
| 300 | + } catch (error) { |
| 301 | + console.error("Error generating PDF:", error); |
| 302 | + res.status(500).json({ |
| 303 | + error: "Internal server error while generating PDF", |
| 304 | + message: "An unexpected error occurred.", |
| 305 | + }); |
| 306 | + } |
| 307 | +} |
0 commit comments