Skip to content

Commit 580fd57

Browse files
authored
[TEC-386] Click to copy page as md (#2491)
* click to copy as md * llm.txt * take out extra /docs from llms.txt paths * /docs/llms.txt and /llms.txt * redirect
1 parent 11186f9 commit 580fd57

File tree

7 files changed

+437
-2
lines changed

7 files changed

+437
-2
lines changed

docusaurus.config.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ module.exports = {
268268
],
269269
plugins: [
270270
'docusaurus-plugin-sass',
271+
'./plugins/markdown-extract',
272+
'./plugins/llms-txt',
271273
[
272274
'@docusaurus/plugin-client-redirects',
273275
{

netlify.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ from = "/"
1818
to = "/docs"
1919
status = 302
2020

21+
[[redirects]]
22+
from = "/docs/llms.txt"
23+
to = "/llms.txt"
24+
status = 301
25+
2126
[[redirects]]
2227
from = "/*"
2328
to = "/docs/404.html"

plugins/llms-txt/index.js

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
const fs = require('node:fs');
2+
const path = require('node:path');
3+
4+
function walk(dir, extFilter = ['.md', '.mdx']) {
5+
const results = [];
6+
7+
if (!fs.existsSync(dir)) {
8+
return results;
9+
}
10+
11+
for (const entry of fs.readdirSync(dir, {withFileTypes: true})) {
12+
const fullPath = path.join(dir, entry.name);
13+
14+
if (entry.isDirectory()) {
15+
results.push(...walk(fullPath, extFilter));
16+
} else if (
17+
!entry.name.startsWith('_') &&
18+
extFilter.includes(path.extname(entry.name))
19+
) {
20+
results.push(fullPath);
21+
}
22+
}
23+
24+
return results;
25+
}
26+
27+
function getDocId(relPath) {
28+
let docId = relPath.replace(/\.(md|mdx)$/, '');
29+
30+
if (path.basename(docId) === 'index') {
31+
docId = path.dirname(docId);
32+
if (docId === '.' || docId === '') {
33+
docId = 'index';
34+
}
35+
}
36+
37+
return docId.replace(/\\/g, '/');
38+
}
39+
40+
function normalizeBasePath(baseUrl) {
41+
const basePath = baseUrl || '/';
42+
if (basePath === '/') {
43+
return '/';
44+
}
45+
return basePath.startsWith('/') && basePath.endsWith('/')
46+
? basePath
47+
: `/${basePath.replace(/^\/|\/$/g, '')}/`;
48+
}
49+
50+
function getOutputDir(outDir) {
51+
return path.dirname(outDir);
52+
}
53+
54+
function toAbsoluteUrl(siteUrl, pathUrl) {
55+
if (!siteUrl) {
56+
return pathUrl;
57+
}
58+
return new URL(pathUrl, siteUrl).toString();
59+
}
60+
61+
function buildLlmsTxt({
62+
siteUrl,
63+
basePath,
64+
htmlRelativeUrls,
65+
markdownRelativeUrls,
66+
}) {
67+
const baseUrl = toAbsoluteUrl(siteUrl, basePath);
68+
const htmlAbsoluteUrls = htmlRelativeUrls.map((url) =>
69+
toAbsoluteUrl(siteUrl, url),
70+
);
71+
const markdownAbsoluteUrls = markdownRelativeUrls.map((url) =>
72+
toAbsoluteUrl(siteUrl, url),
73+
);
74+
75+
return [
76+
'# Semgrep Docs',
77+
'# This file lists documentation pages for AI tooling.',
78+
`# Base URL: ${baseUrl}`,
79+
'# Absolute HTML routes:',
80+
...htmlAbsoluteUrls.map((url) => `- ${url}`),
81+
'# Absolute Markdown mirror:',
82+
...markdownAbsoluteUrls.map((url) => `- ${url}`),
83+
'# Relative HTML routes:',
84+
...htmlRelativeUrls.map((url) => `- ${url}`),
85+
'# Relative Markdown mirror:',
86+
...markdownRelativeUrls.map((url) => `- ${url}`),
87+
'',
88+
].join('\n');
89+
}
90+
91+
module.exports = function llmsTxtPlugin(context, options) {
92+
return {
93+
name: 'llms-txt',
94+
async postBuild({outDir}) {
95+
const docsDir = path.resolve(context.siteDir, 'docs');
96+
const markdownFiles = walk(docsDir);
97+
const basePath = normalizeBasePath(context.siteConfig.baseUrl);
98+
const docIds = markdownFiles
99+
.map((file) => getDocId(path.relative(docsDir, file)))
100+
.filter(Boolean)
101+
.sort();
102+
103+
const htmlRelativeUrls = docIds.map((docId) =>
104+
docId === 'index' ? basePath : `${basePath}${docId}`,
105+
);
106+
const markdownRelativeUrls = docIds.map(
107+
(docId) => `${basePath}markdown/${docId}.md`,
108+
);
109+
110+
const llmsTxt = buildLlmsTxt({
111+
siteUrl: context.siteConfig.url,
112+
basePath,
113+
htmlRelativeUrls,
114+
markdownRelativeUrls,
115+
});
116+
const outputDir = getOutputDir(outDir);
117+
fs.mkdirSync(outputDir, {recursive: true});
118+
fs.writeFileSync(path.join(outputDir, 'llms.txt'), llmsTxt, 'utf-8');
119+
},
120+
};
121+
};

plugins/markdown-extract/index.js

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
const fs = require('node:fs');
2+
const path = require('node:path');
3+
4+
/**
5+
* Extract frontmatter from markdown content
6+
*/
7+
function extractFrontmatter(markdown) {
8+
const frontmatterMatch = markdown.match(/^---([\s\S]*?)---/);
9+
if (!frontmatterMatch) {
10+
return { title: null, description: null, content: markdown };
11+
}
12+
13+
const fmContent = frontmatterMatch[1];
14+
const contentWithoutFM = markdown.slice(frontmatterMatch[0].length).trimStart();
15+
16+
// Simple frontmatter parse for title and description (YAML-ish)
17+
const lines = fmContent.split(/\r?\n/);
18+
let title = null;
19+
let description = null;
20+
21+
for (const line of lines) {
22+
const [key, ...rest] = line.split(':');
23+
if (!key) continue;
24+
const value = rest.join(':').trim().replace(/^["']|["']$/g, ''); // Remove quotes
25+
26+
if (key.trim() === 'title') title = value;
27+
if (key.trim() === 'description') description = value;
28+
}
29+
30+
return { title, description, content: contentWithoutFM };
31+
}
32+
33+
/**
34+
* Recursively walk directory and find markdown files
35+
*/
36+
function walk(dir, extFilter = ['.md', '.mdx']) {
37+
const results = [];
38+
39+
if (!fs.existsSync(dir)) {
40+
return results;
41+
}
42+
43+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
44+
const fullPath = path.join(dir, entry.name);
45+
46+
if (entry.isDirectory()) {
47+
results.push(...walk(fullPath, extFilter));
48+
} else if (
49+
!entry.name.startsWith('_') &&
50+
extFilter.includes(path.extname(entry.name))
51+
) {
52+
results.push(fullPath);
53+
}
54+
}
55+
56+
return results;
57+
}
58+
59+
/**
60+
* Process markdown file: remove frontmatter only (keep original content)
61+
*/
62+
function processMarkdownFile(filePath) {
63+
let content = fs.readFileSync(filePath, 'utf-8');
64+
const frontmatterMatch = content.match(/^---([\s\S]*?)---/);
65+
66+
// If there's frontmatter, remove it and return the rest
67+
if (frontmatterMatch) {
68+
return content.slice(frontmatterMatch[0].length).trimStart();
69+
}
70+
71+
// No frontmatter, return as-is
72+
return content;
73+
}
74+
75+
/**
76+
* Convert file path to URL path (handles index files)
77+
*/
78+
function rewritePath(relPath, baseUrl = '/docs/') {
79+
// Remove leading slash if present
80+
let path = relPath.replace(/^\//, '');
81+
82+
// Handle index files - convert /path/index.md to /path.md
83+
if (path.endsWith('/index.md') || path.endsWith('/index.mdx')) {
84+
path = path.replace(/\/index\.(md|mdx)$/, '.md');
85+
}
86+
87+
// Ensure .md extension
88+
if (!path.endsWith('.md')) {
89+
path = path.replace(/\.mdx$/, '.md');
90+
}
91+
92+
return baseUrl + path;
93+
}
94+
95+
/**
96+
* Get the relative path from docs directory
97+
*/
98+
function getRelativePath(filePath, docsDir) {
99+
return path.relative(docsDir, filePath);
100+
}
101+
102+
/**
103+
* Copy original markdown files (just remove frontmatter) to output directory
104+
* Files are organized by their doc ID (file path without extension)
105+
*/
106+
function copyMarkdownFiles(context, outputDir) {
107+
console.log('Copying markdown files for copy-to-markdown feature...');
108+
109+
const docsDir = path.resolve(context.siteDir, 'docs');
110+
const contentFiles = walk(docsDir);
111+
112+
// Create markdown output directory
113+
fs.mkdirSync(outputDir, { recursive: true });
114+
115+
let processedCount = 0;
116+
117+
for (const file of contentFiles) {
118+
try {
119+
const relPath = getRelativePath(file, docsDir);
120+
// Remove frontmatter but keep original content
121+
const processedContent = processMarkdownFile(file);
122+
123+
// Use the file path as the doc ID (remove extension)
124+
// This matches what metadata.id returns
125+
let docId = relPath.replace(/\.(md|mdx)$/, '');
126+
127+
// Handle index files - they map to their parent directory
128+
// e.g., getting-started/index.md -> getting-started
129+
if (path.basename(docId) === 'index') {
130+
docId = path.dirname(docId);
131+
// If it's the root index, keep it as 'index'
132+
if (docId === '.' || docId === '') {
133+
docId = 'index';
134+
}
135+
}
136+
137+
// Normalize path separators for cross-platform compatibility
138+
docId = docId.replace(/\\/g, '/');
139+
140+
// Create output path: markdown/getting-started/introduction.md
141+
const outputPath = path.join(outputDir, docId + '.md');
142+
143+
// Create directory structure
144+
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
145+
146+
// Write processed markdown (frontmatter removed, but original content preserved)
147+
fs.writeFileSync(outputPath, processedContent, 'utf-8');
148+
processedCount++;
149+
} catch (error) {
150+
console.warn(`Error processing ${file}:`, error.message);
151+
}
152+
}
153+
154+
console.log(`Copied ${processedCount} markdown files to ${outputDir}`);
155+
}
156+
157+
module.exports = function markdownExtractPlugin(context, options) {
158+
return {
159+
name: 'markdown-extract',
160+
async postBuild({ outDir, routes }) {
161+
// Only copy to build directory for production (not to static to save space)
162+
// Files are only generated during build, not in static directory
163+
const markdownOutputDir = path.join(outDir, 'markdown');
164+
copyMarkdownFiles(context, markdownOutputDir);
165+
},
166+
};
167+
};
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import React, { useState } from 'react';
2+
import { useDoc } from '@docusaurus/plugin-content-docs/client';
3+
import styles from './styles.module.css';
4+
5+
export default function CopyToMarkdownButton(): JSX.Element | null {
6+
// Since this component is only used in DocItem/Content, we're always in a doc context
7+
const {metadata} = useDoc();
8+
const [buttonText, setButtonText] = useState('Copy as md');
9+
const [isLoading, setIsLoading] = useState(false);
10+
11+
// Only show if we have a valid doc ID
12+
if (!metadata?.id) {
13+
return null;
14+
}
15+
16+
// Construct the markdown file path using the doc ID
17+
// The doc ID is the file path relative to docs/ directory
18+
// Example: metadata.id = "getting-started/introduction" -> /docs/markdown/getting-started/introduction.md
19+
const getMarkdownPath = () => {
20+
const docId = metadata.id;
21+
22+
// Handle index/homepage
23+
if (docId === 'Docs home' || docId === 'index' || !docId) {
24+
return '/docs/markdown/index.md';
25+
}
26+
27+
// Add .md extension
28+
return `/docs/markdown/${docId}.md`;
29+
};
30+
31+
const handleCopy = async () => {
32+
setIsLoading(true);
33+
const markdownPath = getMarkdownPath();
34+
const resetTextMS = 2000;
35+
const btnText = 'Copy as md';
36+
37+
try {
38+
const response = await fetch(markdownPath);
39+
if (!response.ok) {
40+
throw new Error(`Failed to fetch markdown: ${response.status}`);
41+
}
42+
43+
const markdownText = await response.text();
44+
45+
// Create clipboard item within user gesture
46+
const clipboardItem = new ClipboardItem({
47+
'text/plain': Promise.resolve(markdownText),
48+
});
49+
50+
await navigator.clipboard.write([clipboardItem]);
51+
setButtonText('Copied!');
52+
53+
setTimeout(() => {
54+
setButtonText(btnText);
55+
setIsLoading(false);
56+
}, resetTextMS);
57+
} catch (err) {
58+
console.error('Error copying markdown:', err);
59+
setButtonText('Failed');
60+
61+
setTimeout(() => {
62+
setButtonText(btnText);
63+
setIsLoading(false);
64+
}, resetTextMS);
65+
}
66+
};
67+
68+
return (
69+
<button
70+
id="copy-docs-markdown-llm-button"
71+
className={styles.copyButton}
72+
onClick={handleCopy}
73+
disabled={isLoading}
74+
type="button"
75+
>
76+
{buttonText}
77+
</button>
78+
);
79+
}

0 commit comments

Comments
 (0)