Skip to content

Commit ea62dcc

Browse files
marcklingengithub-advanced-security[bot]ellipsis-dev[bot]
authored
feat: add /api/md-to-pdf route to allow downloading of markdown files from docs (#2151)
* feat: add /api/md-to-pdf route to allow downloading of markdown files from docs * Potential fix for code scanning alert no. 28: Server-side request forgery Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * add langfuse.com * cdn cache headers * skip dev * add to privacy * make a bit nicer * add to cookie policy * push * Update pages/api/md-to-pdf.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * fix * skip link validation --------- Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
1 parent d22d1d2 commit ea62dcc

File tree

7 files changed

+850
-2
lines changed

7 files changed

+850
-2
lines changed

components/MainContentWrapper.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const pathsWithCopyAsMarkdownButton = [
3232
"/faq",
3333
"/integrations",
3434
"/handbook",
35+
"/security",
3536
];
3637
const isCustomerStory = (pathname: string) =>
3738
pathname.startsWith("/customers/");

package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
"@radix-ui/react-use-controllable-state": "^1.2.2",
5555
"@react-three/drei": "^9.120.4",
5656
"@react-three/fiber": "^8.17.10",
57+
"@sparticuz/chromium": "^140.0.0",
5758
"@supabase/supabase-js": "^2.47.10",
5859
"@tanstack/react-table": "^8.20.6",
5960
"@vercel/functions": "^2.2.2",
@@ -73,6 +74,7 @@
7374
"katex": "^0.16.22",
7475
"langfuse": "^3.38.4",
7576
"lucide-react": "^0.469.0",
77+
"marked": "^16.3.0",
7678
"nanoid": "^5.1.5",
7779
"next": "^15.2.4",
7880
"next-sitemap": "^4.2.3",
@@ -82,6 +84,7 @@
8284
"postcss": "^8.4.49",
8385
"posthog-js": "^1.203.1",
8486
"posthog-node": "^5.1.1",
87+
"puppeteer-core": "^24.23.0",
8588
"react": "^18.3.1",
8689
"react-country-flag": "^3.1.0",
8790
"react-dom": "^18.3.1",
@@ -105,6 +108,7 @@
105108
"@types/react-syntax-highlighter": "^15.5.13",
106109
"cross-env": "^7.0.3",
107110
"markdown-link-check": "^3.13.6",
111+
"puppeteer": "^24.23.0",
108112
"typescript": "^5.6.3",
109113
"xml2js": "^0.6.2"
110114
},

pages/api/md-to-pdf.ts

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
import type { NextApiRequest, NextApiResponse } from "next";
2+
import { marked } from "marked";
3+
4+
// Allowed hostnames for markdown sourcing. Only fetch markdown from trusted hosts.
5+
const ALLOWED_HOSTNAMES = [
6+
"langfuse.com",
7+
"raw.githubusercontent.com",
8+
"github.com",
9+
];
10+
11+
/**
12+
* Remove anchor tags from headings (e.g., [#anchor-id])
13+
* These are useful for web navigation but not needed in PDFs
14+
*/
15+
function removeAnchorTags(content: string): string {
16+
// Match [#anchor-id] at the end of headings
17+
return content.replace(/\s*\[#[\w-]+\]/g, "");
18+
}
19+
20+
/**
21+
* Process MDX Callout components and convert them to HTML divs
22+
* Supports types: info, warn, warning, error, danger
23+
*/
24+
function processCallouts(content: string): string {
25+
// Match <Callout type="...">...</Callout> (including self-closing and multiline)
26+
const calloutRegex =
27+
/<Callout\s+type=["'](\w+)["']\s*>([\s\S]*?)<\/Callout>/g;
28+
29+
return content.replace(calloutRegex, (match, type, innerContent) => {
30+
// The innerContent might contain markdown that will be processed later
31+
// Wrap it in a special div that we'll style
32+
return `<div class="callout callout-${type}">${innerContent}</div>`;
33+
});
34+
}
35+
36+
export default async function handler(
37+
req: NextApiRequest,
38+
res: NextApiResponse
39+
) {
40+
try {
41+
// Get the markdown URL from query parameters
42+
const { url, disposition } = req.query;
43+
44+
if (!url || typeof url !== "string") {
45+
return res.status(400).json({
46+
error: "Missing or invalid 'url' query parameter",
47+
});
48+
}
49+
50+
// Validate URL
51+
let markdownUrl: URL;
52+
try {
53+
markdownUrl = new URL(url);
54+
} catch (error) {
55+
return res.status(400).json({
56+
error: "Invalid URL format",
57+
});
58+
}
59+
60+
// Check hostname against allow-list to prevent SSRF in production
61+
// Skip in dev to allow for tests against devserver
62+
if (
63+
process.env.NODE_ENV !== "development" &&
64+
!ALLOWED_HOSTNAMES.includes(markdownUrl.hostname)
65+
) {
66+
return res.status(400).json({
67+
error: `Fetching from ${markdownUrl.hostname} is not permitted.`,
68+
allowed: ALLOWED_HOSTNAMES,
69+
});
70+
}
71+
72+
// Fetch the markdown content
73+
const response = await fetch(markdownUrl.toString());
74+
75+
if (!response.ok) {
76+
return res.status(response.status).json({
77+
error: `Failed to fetch markdown: ${response.statusText}`,
78+
});
79+
}
80+
81+
let markdownContent = await response.text();
82+
83+
// Strip frontmatter (YAML between --- delimiters)
84+
markdownContent = markdownContent.replace(
85+
/^---\r?\n[\s\S]*?\r?\n---\r?\n/,
86+
""
87+
);
88+
89+
// Remove anchor tags from headings (not needed in PDF)
90+
markdownContent = removeAnchorTags(markdownContent);
91+
92+
// Convert markdown to HTML
93+
let htmlContent = await marked.parse(markdownContent);
94+
95+
// Process Callout components in the HTML
96+
htmlContent = processCallouts(htmlContent);
97+
98+
// Create a complete HTML document with styling
99+
const fullHtml = `
100+
<!DOCTYPE html>
101+
<html>
102+
<head>
103+
<meta charset="UTF-8">
104+
<style>
105+
body {
106+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
107+
line-height: 1.6;
108+
color: #333;
109+
max-width: 800px;
110+
margin: 0 auto;
111+
padding: 20px;
112+
}
113+
h1, h2, h3, h4, h5, h6 {
114+
margin-top: 24px;
115+
margin-bottom: 16px;
116+
font-weight: 600;
117+
line-height: 1.25;
118+
}
119+
h1 { font-size: 2em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
120+
h2 { font-size: 1.5em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
121+
h3 { font-size: 1.25em; }
122+
h4 { font-size: 1em; }
123+
h5 { font-size: 0.875em; }
124+
h6 { font-size: 0.85em; color: #6a737d; }
125+
p { margin-bottom: 16px; }
126+
a { color: #0366d6; text-decoration: none; }
127+
a:hover { text-decoration: underline; }
128+
code {
129+
background-color: rgba(27, 31, 35, 0.05);
130+
border-radius: 3px;
131+
padding: 0.2em 0.4em;
132+
font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
133+
font-size: 85%;
134+
}
135+
pre {
136+
background-color: #f6f8fa;
137+
border-radius: 3px;
138+
padding: 16px;
139+
overflow: auto;
140+
line-height: 1.45;
141+
}
142+
pre code {
143+
background-color: transparent;
144+
padding: 0;
145+
}
146+
blockquote {
147+
border-left: 4px solid #dfe2e5;
148+
padding-left: 16px;
149+
color: #6a737d;
150+
margin-left: 0;
151+
}
152+
ul, ol {
153+
margin-bottom: 16px;
154+
padding-left: 2em;
155+
}
156+
li {
157+
margin-bottom: 4px;
158+
}
159+
table {
160+
border-collapse: collapse;
161+
width: 100%;
162+
margin-bottom: 16px;
163+
}
164+
table th, table td {
165+
border: 1px solid #dfe2e5;
166+
padding: 6px 13px;
167+
}
168+
table th {
169+
background-color: #f6f8fa;
170+
font-weight: 600;
171+
}
172+
img {
173+
max-width: 100%;
174+
height: auto;
175+
}
176+
hr {
177+
border: 0;
178+
border-top: 1px solid #eaecef;
179+
margin: 24px 0;
180+
}
181+
.source-url {
182+
color: #6a737d;
183+
font-size: 0.875em;
184+
padding: 12px 0;
185+
margin-bottom: 24px;
186+
border-bottom: 2px solid #eaecef;
187+
word-break: break-all;
188+
}
189+
.source-url strong {
190+
color: #24292e;
191+
font-weight: 600;
192+
}
193+
/* Callout component styles */
194+
.callout {
195+
padding: 16px;
196+
margin: 16px 0;
197+
border-radius: 6px;
198+
border-left: 4px solid;
199+
background-color: #f6f8fa;
200+
page-break-inside: avoid;
201+
}
202+
.callout p:first-child {
203+
margin-top: 0;
204+
}
205+
.callout p:last-child {
206+
margin-bottom: 0;
207+
}
208+
.callout-info {
209+
border-left-color: #0969da;
210+
background-color: #ddf4ff;
211+
}
212+
.callout-warn,
213+
.callout-warning {
214+
border-left-color: #d4a72c;
215+
background-color: #fff8dc;
216+
}
217+
.callout-error,
218+
.callout-danger {
219+
border-left-color: #cf222e;
220+
background-color: #ffebe9;
221+
}
222+
</style>
223+
</head>
224+
<body>
225+
<div class="source-url">
226+
<strong>Source:</strong> ${markdownUrl.toString()}<br/>
227+
<strong>PDF created at:</strong> ${new Date().toISOString()}
228+
</div>
229+
${htmlContent}
230+
</body>
231+
</html>
232+
`;
233+
234+
// Launch Puppeteer and generate PDF
235+
// Use local Chrome for development, serverless Chromium for production
236+
const isDev = process.env.NODE_ENV === "development";
237+
238+
let browser;
239+
if (isDev) {
240+
// Use puppeteer with bundled Chromium for local development
241+
const puppeteer = await import("puppeteer");
242+
browser = await puppeteer.default.launch({
243+
headless: true,
244+
args: ["--no-sandbox", "--disable-setuid-sandbox"],
245+
});
246+
} else {
247+
// Use puppeteer-core with serverless Chromium for production
248+
const puppeteerCore = await import("puppeteer-core");
249+
const chromium = await import("@sparticuz/chromium");
250+
browser = await puppeteerCore.default.launch({
251+
args: chromium.default.args,
252+
executablePath: await chromium.default.executablePath(),
253+
headless: true,
254+
});
255+
}
256+
257+
try {
258+
const page = await browser.newPage();
259+
await page.setContent(fullHtml, { waitUntil: "networkidle0" });
260+
261+
const pdf = await page.pdf({
262+
format: "A4",
263+
printBackground: true,
264+
margin: {
265+
top: "1cm",
266+
right: "1cm",
267+
bottom: "1cm",
268+
left: "1cm",
269+
},
270+
});
271+
272+
// Extract filename from URL
273+
const pathname = markdownUrl.pathname;
274+
const filename = pathname.split("/").pop() || "document.md";
275+
const pdfFilename = filename.replace(/\.mdx?$/i, ".pdf");
276+
277+
// Determine content disposition (default to inline)
278+
const contentDisposition =
279+
disposition === "download" ? "attachment" : "inline";
280+
281+
// Set response headers
282+
res.setHeader("Content-Type", "application/pdf");
283+
res.setHeader(
284+
"Content-Disposition",
285+
`${contentDisposition}; filename="${pdfFilename}"`
286+
);
287+
res.setHeader("Content-Length", pdf.length);
288+
// Cache for 60 seconds on CDN, serve stale while revalidating for 24 hours
289+
res.setHeader(
290+
"Cache-Control",
291+
"public, s-maxage=60, stale-while-revalidate=86400"
292+
);
293+
294+
// Send the PDF as a buffer
295+
res.status(200).end(pdf);
296+
} finally {
297+
// Ensure browser is always closed, even if an error occurs
298+
await browser.close();
299+
}
300+
} catch (error) {
301+
console.error("Error generating PDF:", error);
302+
res.status(500).json({
303+
error: "Internal server error while generating PDF",
304+
message: "An unexpected error occurred.",
305+
});
306+
}
307+
}

pages/cookie-policy.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ title: Cookie Policy
44

55
# Cookie Policy
66

7-
Last updated: May 06, 2024
7+
**Last updated: May 06, 2024 | <a href="/api/md-to-pdf?url=https://langfuse.com/cookie-policy.md" target="_blank">download as PDF</a>**
88

99
This Cookie Policy explains how **Langfuse GmbH** ("Company," "we," "us," and "our") uses cookies and similar technologies to recognize you when you visit our website at https://www.langfuse.com ("Website"). It explains what these technologies are and why we use them, as well as your rights to control our use of them.
1010

pages/privacy.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ title: Privacy Policy
44

55
# Privacy Policy
66

7-
**Last updated February 27th, 2025**
7+
**Last updated February 27th, 2025 | <a href="/api/md-to-pdf?url=https://langfuse.com/privacy.md" target="_blank">download as PDF</a>**
88

99
This privacy notice for **Langfuse GmbH** (doing business as **Langfuse**) ("**we**," "**us**," or "**our**"), describes how and why we might collect, store, use, and/or share ("**process**") your information when you use our services ("**Services**"), such as when you:
1010

0 commit comments

Comments
 (0)