[TEC-386] Click to copy page as md (#2491)

abhijna · web-flow · commit 580fd5731ad0 · 2026-02-18T09:44:19.000-08:00
* click to copy as md

* llm.txt

* take out extra /docs from llms.txt paths

* /docs/llms.txt and /llms.txt

* redirect
diff --git a/docusaurus.config.js b/docusaurus.config.js
@@ -268,6 +268,8 @@ module.exports = {
   ],
   plugins: [
     'docusaurus-plugin-sass',
+    './plugins/markdown-extract',
+    './plugins/llms-txt',
     [
       '@docusaurus/plugin-client-redirects',
       {
diff --git a/netlify.toml b/netlify.toml
@@ -18,6 +18,11 @@ from = "/"
 to = "/docs"
 status = 302
 
+[[redirects]]
+from = "/docs/llms.txt"
+to = "/llms.txt"
+status = 301
+
 [[redirects]]
 from = "/*"
 to = "/docs/404.html"
diff --git a/plugins/llms-txt/index.js b/plugins/llms-txt/index.js
@@ -0,0 +1,121 @@
+const fs = require('node:fs');
+const path = require('node:path');
+
+function walk(dir, extFilter = ['.md', '.mdx']) {
+  const results = [];
+
+  if (!fs.existsSync(dir)) {
+    return results;
+  }
+
+  for (const entry of fs.readdirSync(dir, {withFileTypes: true})) {
+    const fullPath = path.join(dir, entry.name);
+
+    if (entry.isDirectory()) {
+      results.push(...walk(fullPath, extFilter));
+    } else if (
+      !entry.name.startsWith('_') &&
+      extFilter.includes(path.extname(entry.name))
+    ) {
+      results.push(fullPath);
+    }
+  }
+
+  return results;
+}
+
+function getDocId(relPath) {
+  let docId = relPath.replace(/\.(md|mdx)$/, '');
+
+  if (path.basename(docId) === 'index') {
+    docId = path.dirname(docId);
+    if (docId === '.' || docId === '') {
+      docId = 'index';
+    }
+  }
+
+  return docId.replace(/\\/g, '/');
+}
+
+function normalizeBasePath(baseUrl) {
+  const basePath = baseUrl || '/';
+  if (basePath === '/') {
+    return '/';
+  }
+  return basePath.startsWith('/') && basePath.endsWith('/')
+    ? basePath
+    : `/${basePath.replace(/^\/|\/$/g, '')}/`;
+}
+
+function getOutputDir(outDir) {
+  return path.dirname(outDir);
+}
+
+function toAbsoluteUrl(siteUrl, pathUrl) {
+  if (!siteUrl) {
+    return pathUrl;
+  }
+  return new URL(pathUrl, siteUrl).toString();
+}
+
+function buildLlmsTxt({
+  siteUrl,
+  basePath,
+  htmlRelativeUrls,
+  markdownRelativeUrls,
+}) {
+  const baseUrl = toAbsoluteUrl(siteUrl, basePath);
+  const htmlAbsoluteUrls = htmlRelativeUrls.map((url) =>
+    toAbsoluteUrl(siteUrl, url),
+  );
+  const markdownAbsoluteUrls = markdownRelativeUrls.map((url) =>
+    toAbsoluteUrl(siteUrl, url),
+  );
+
+  return [
+    '# Semgrep Docs',
+    '# This file lists documentation pages for AI tooling.',
+    `# Base URL: ${baseUrl}`,
+    '# Absolute HTML routes:',
+    ...htmlAbsoluteUrls.map((url) => `- ${url}`),
+    '# Absolute Markdown mirror:',
+    ...markdownAbsoluteUrls.map((url) => `- ${url}`),
+    '# Relative HTML routes:',
+    ...htmlRelativeUrls.map((url) => `- ${url}`),
+    '# Relative Markdown mirror:',
+    ...markdownRelativeUrls.map((url) => `- ${url}`),
+    '',
+  ].join('\n');
+}
+
+module.exports = function llmsTxtPlugin(context, options) {
+  return {
+    name: 'llms-txt',
+    async postBuild({outDir}) {
+      const docsDir = path.resolve(context.siteDir, 'docs');
+      const markdownFiles = walk(docsDir);
+      const basePath = normalizeBasePath(context.siteConfig.baseUrl);
+      const docIds = markdownFiles
+        .map((file) => getDocId(path.relative(docsDir, file)))
+        .filter(Boolean)
+        .sort();
+
+      const htmlRelativeUrls = docIds.map((docId) =>
+        docId === 'index' ? basePath : `${basePath}${docId}`,
+      );
+      const markdownRelativeUrls = docIds.map(
+        (docId) => `${basePath}markdown/${docId}.md`,
+      );
+
+      const llmsTxt = buildLlmsTxt({
+        siteUrl: context.siteConfig.url,
+        basePath,
+        htmlRelativeUrls,
+        markdownRelativeUrls,
+      });
+      const outputDir = getOutputDir(outDir);
+      fs.mkdirSync(outputDir, {recursive: true});
+      fs.writeFileSync(path.join(outputDir, 'llms.txt'), llmsTxt, 'utf-8');
+    },
+  };
+};
diff --git a/plugins/markdown-extract/index.js b/plugins/markdown-extract/index.js
@@ -0,0 +1,167 @@
+const fs = require('node:fs');
+const path = require('node:path');
+
+/**
+ * Extract frontmatter from markdown content
+ */
+function extractFrontmatter(markdown) {
+  const frontmatterMatch = markdown.match(/^---([\s\S]*?)---/);
+  if (!frontmatterMatch) {
+    return { title: null, description: null, content: markdown };
+  }
+
+  const fmContent = frontmatterMatch[1];
+  const contentWithoutFM = markdown.slice(frontmatterMatch[0].length).trimStart();
+
+  // Simple frontmatter parse for title and description (YAML-ish)
+  const lines = fmContent.split(/\r?\n/);
+  let title = null;
+  let description = null;
+
+  for (const line of lines) {
+    const [key, ...rest] = line.split(':');
+    if (!key) continue;
+    const value = rest.join(':').trim().replace(/^["']|["']$/g, ''); // Remove quotes
+
+    if (key.trim() === 'title') title = value;
+    if (key.trim() === 'description') description = value;
+  }
+
+  return { title, description, content: contentWithoutFM };
+}
+
+/**
+ * Recursively walk directory and find markdown files
+ */
+function walk(dir, extFilter = ['.md', '.mdx']) {
+  const results = [];
+
+  if (!fs.existsSync(dir)) {
+    return results;
+  }
+
+  for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+    const fullPath = path.join(dir, entry.name);
+
+    if (entry.isDirectory()) {
+      results.push(...walk(fullPath, extFilter));
+    } else if (
+      !entry.name.startsWith('_') &&
+      extFilter.includes(path.extname(entry.name))
+    ) {
+      results.push(fullPath);
+    }
+  }
+
+  return results;
+}
+
+/**
+ * Process markdown file: remove frontmatter only (keep original content)
+ */
+function processMarkdownFile(filePath) {
+  let content = fs.readFileSync(filePath, 'utf-8');
+  const frontmatterMatch = content.match(/^---([\s\S]*?)---/);
+  
+  // If there's frontmatter, remove it and return the rest
+  if (frontmatterMatch) {
+    return content.slice(frontmatterMatch[0].length).trimStart();
+  }
+  
+  // No frontmatter, return as-is
+  return content;
+}
+
+/**
+ * Convert file path to URL path (handles index files)
+ */
+function rewritePath(relPath, baseUrl = '/docs/') {
+  // Remove leading slash if present
+  let path = relPath.replace(/^\//, '');
+  
+  // Handle index files - convert /path/index.md to /path.md
+  if (path.endsWith('/index.md') || path.endsWith('/index.mdx')) {
+    path = path.replace(/\/index\.(md|mdx)$/, '.md');
+  }
+  
+  // Ensure .md extension
+  if (!path.endsWith('.md')) {
+    path = path.replace(/\.mdx$/, '.md');
+  }
+  
+  return baseUrl + path;
+}
+
+/**
+ * Get the relative path from docs directory
+ */
+function getRelativePath(filePath, docsDir) {
+  return path.relative(docsDir, filePath);
+}
+
+/**
+ * Copy original markdown files (just remove frontmatter) to output directory
+ * Files are organized by their doc ID (file path without extension)
+ */
+function copyMarkdownFiles(context, outputDir) {
+  console.log('Copying markdown files for copy-to-markdown feature...');
+  
+  const docsDir = path.resolve(context.siteDir, 'docs');
+  const contentFiles = walk(docsDir);
+  
+  // Create markdown output directory
+  fs.mkdirSync(outputDir, { recursive: true });
+
+  let processedCount = 0;
+
+  for (const file of contentFiles) {
+    try {
+      const relPath = getRelativePath(file, docsDir);
+      // Remove frontmatter but keep original content
+      const processedContent = processMarkdownFile(file);
+      
+      // Use the file path as the doc ID (remove extension)
+      // This matches what metadata.id returns
+      let docId = relPath.replace(/\.(md|mdx)$/, '');
+      
+      // Handle index files - they map to their parent directory
+      // e.g., getting-started/index.md -> getting-started
+      if (path.basename(docId) === 'index') {
+        docId = path.dirname(docId);
+        // If it's the root index, keep it as 'index'
+        if (docId === '.' || docId === '') {
+          docId = 'index';
+        }
+      }
+      
+      // Normalize path separators for cross-platform compatibility
+      docId = docId.replace(/\\/g, '/');
+      
+      // Create output path: markdown/getting-started/introduction.md
+      const outputPath = path.join(outputDir, docId + '.md');
+      
+      // Create directory structure
+      fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+      
+      // Write processed markdown (frontmatter removed, but original content preserved)
+      fs.writeFileSync(outputPath, processedContent, 'utf-8');
+      processedCount++;
+    } catch (error) {
+      console.warn(`Error processing ${file}:`, error.message);
+    }
+  }
+
+  console.log(`Copied ${processedCount} markdown files to ${outputDir}`);
+}
+
+module.exports = function markdownExtractPlugin(context, options) {
+  return {
+    name: 'markdown-extract',
+    async postBuild({ outDir, routes }) {
+      // Only copy to build directory for production (not to static to save space)
+      // Files are only generated during build, not in static directory
+      const markdownOutputDir = path.join(outDir, 'markdown');
+      copyMarkdownFiles(context, markdownOutputDir);
+    },
+  };
+};
diff --git a/src/components/CopyToMarkdownButton/index.tsx b/src/components/CopyToMarkdownButton/index.tsx
@@ -0,0 +1,79 @@
+import React, { useState } from 'react';
+import { useDoc } from '@docusaurus/plugin-content-docs/client';
+import styles from './styles.module.css';
+
+export default function CopyToMarkdownButton(): JSX.Element | null {
+  // Since this component is only used in DocItem/Content, we're always in a doc context
+  const {metadata} = useDoc();
+  const [buttonText, setButtonText] = useState('Copy as md');
+  const [isLoading, setIsLoading] = useState(false);
+
+  // Only show if we have a valid doc ID
+  if (!metadata?.id) {
+    return null;
+  }
+
+  // Construct the markdown file path using the doc ID
+  // The doc ID is the file path relative to docs/ directory
+  // Example: metadata.id = "getting-started/introduction" -> /docs/markdown/getting-started/introduction.md
+  const getMarkdownPath = () => {
+    const docId = metadata.id;
+    
+    // Handle index/homepage
+    if (docId === 'Docs home' || docId === 'index' || !docId) {
+      return '/docs/markdown/index.md';
+    }
+    
+    // Add .md extension
+    return `/docs/markdown/${docId}.md`;
+  };
+
+  const handleCopy = async () => {
+    setIsLoading(true);
+    const markdownPath = getMarkdownPath();
+    const resetTextMS = 2000;
+    const btnText = 'Copy as md';
+
+    try {
+      const response = await fetch(markdownPath);
+      if (!response.ok) {
+        throw new Error(`Failed to fetch markdown: ${response.status}`);
+      }
+
+      const markdownText = await response.text();
+
+      // Create clipboard item within user gesture
+      const clipboardItem = new ClipboardItem({
+        'text/plain': Promise.resolve(markdownText),
+      });
+
+      await navigator.clipboard.write([clipboardItem]);
+      setButtonText('Copied!');
+      
+      setTimeout(() => {
+        setButtonText(btnText);
+        setIsLoading(false);
+      }, resetTextMS);
+    } catch (err) {
+      console.error('Error copying markdown:', err);
+      setButtonText('Failed');
+      
+      setTimeout(() => {
+        setButtonText(btnText);
+        setIsLoading(false);
+      }, resetTextMS);
+    }
+  };
+
+  return (
+    <button
+      id="copy-docs-markdown-llm-button"
+      className={styles.copyButton}
+      onClick={handleCopy}
+      disabled={isLoading}
+      type="button"
+    >
+      {buttonText}
+    </button>
+  );
+}
diff --git a/src/components/CopyToMarkdownButton/styles.module.css b/src/components/CopyToMarkdownButton/styles.module.css
diff --git a/src/theme/DocItem/Content/index.tsx b/src/theme/DocItem/Content/index.tsx

Original file line number	Diff line number	Diff line change
`@@ -268,6 +268,8 @@ module.exports = {`
`268`	`268`	`],`
`269`	`269`	`plugins: [`
`270`	`270`	`'docusaurus-plugin-sass',`
	`271`	`+ './plugins/markdown-extract',`
	`272`	`+ './plugins/llms-txt',`
`271`	`273`	`[`
`272`	`274`	`'@docusaurus/plugin-client-redirects',`
`273`	`275`	`{`