apache
diff --git a/‎gulp/helpers/html-index.js‎
Lines changed: 164 additions & 0 deletions b/‎gulp/helpers/html-index.js‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎gulp/helpers/rss-feed.js‎
Lines changed: 106 additions & 0 deletions b/‎gulp/helpers/rss-feed.js‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎gulp/helpers/toon-format.js‎
Lines changed: 90 additions & 0 deletions b/‎gulp/helpers/toon-format.js‎
Lines changed: 90 additions & 0 deletions
@@ -0,0 +1,164 @@
+const fs = require('fs');
+const { parse } = require('node-html-parser');
+const { createTurndownService } = require('./turndown-config');
+
+/**
+ * Generic function to generate toon format markdown from HTML index pages.
+ * Converts HTML pages to .html.md files as per https://github.com/toon-format/toon specification.
+ *
+ * @param {Object} config - Configuration object
+ * @param {string} config.htmlPath - Path to the HTML file (e.g., 'public/components/next/index.html')
+ * @param {string} config.title - Title for the markdown file (e.g., 'Components Index')
+ * @param {string} config.description - Description text (e.g., 'List of all Camel components')
+ */
+async function generateHtmlIndex(config) {
+  const { htmlPath, title, description } = config;
+  const mdPath = htmlPath.replace(/\.html$/, '.md');
+
+  try {
+    // Check if file exists
+    if (!fs.existsSync(htmlPath)) {
+      return;
+    }
+
+    const htmlContent = fs.readFileSync(htmlPath, 'utf8');
+    const root = parse(htmlContent);
+
+    // Create turndown service
+    const turndownService = createTurndownService();
+
+    // Extract only the main article content
+    let mainContent = root.querySelector('article.doc') ||
+                     root.querySelector('main') ||
+                     root.querySelector('.article') ||
+                     root.querySelector('article');
+
+    if (!mainContent) {
+      return;
+    }
+
+    // Remove navigation elements
+    const elementsToRemove = mainContent.querySelectorAll('nav, header, footer, .nav, .navbar, .toolbar');
+    elementsToRemove.forEach(el => el.remove());
+
+    // Remove anchor links
+    const anchors = mainContent.querySelectorAll('a.anchor');
+    anchors.forEach(el => el.remove());
+
+    // Clean up table cells by unwrapping div.content and div.paragraph wrappers
+    const tableCells = mainContent.querySelectorAll('td.tableblock, th.tableblock');
+    tableCells.forEach(cell => {
+      let html = cell.innerHTML;
+      // Unwrap <div class="content"><div class="paragraph"><p>...</p></div></div>
+      html = html.replace(/<div class="content"><div class="paragraph">\s*<p>(.*?)<\/p>\s*<\/div><\/div>/gs, '$1');
+      // Unwrap <div class="content"><div id="..." class="paragraph"><p>...</p></div></div>
+      html = html.replace(/<div class="content"><div[^>]*class="paragraph"[^>]*>\s*<p>(.*?)<\/p>\s*<\/div><\/div>/gs, '$1');
+      // Also handle simple <p class="tableblock">...</p> wrappers
+      html = html.replace(/<p class="tableblock">(.*?)<\/p>/gs, '$1');
+      cell.set_content(html);
+    });
+
+    // Convert to Markdown
+    let markdown = turndownService.turndown(mainContent.innerHTML);
+
+    // Update links to point to .md files instead of .html
+    // Replace https://camel.apache.org/**/*.html with https://camel.apache.org/**/*.md
+    markdown = markdown.replace(/(https:\/\/camel\.apache\.org\/[^)\s]*?)\.html/g, '$1.md');
+    // Replace relative links *.html with *.md
+    markdown = markdown.replace(/\[([^\]]+)\]\(([^)]+?)\.html\)/g, '[$1]($2.md)');
+
+    // Add header if title and description provided
+    if (title && description) {
+      markdown = `# ${title}\n\n${description}\n\n${markdown}`;
+    }
+
+    // Write markdown file
+    fs.writeFileSync(mdPath, markdown, 'utf8');
+  } catch (error) {
+    console.error(`Error generating markdown for ${htmlPath}:`, error.message);
+  }
+}
+
+/**
+ * Generates markdown for all index files (HTML index pages).
+ * This function processes all the index files specified in the configuration.
+ */
+async function generateAllIndexes() {
+  console.log('\nGenerating markdown for all index files...');
+
+  // Define all HTML index files to process
+  const htmlIndexes = [
+    {
+      htmlPath: 'public/camel-k/next/index.html',
+      title: 'Camel K Documentation Index',
+      description: 'Index of Camel K documentation pages.'
+    },
+    {
+      htmlPath: 'public/camel-kafka-connector/next/index.html',
+      title: 'Camel Kafka Connector Documentation Index',
+      description: 'Index of Camel Kafka Connector documentation pages.'
+    },
+    {
+      htmlPath: 'public/camel-kamelets/next/index.html',
+      title: 'Camel Kamelets Documentation Index',
+      description: 'Index of Camel Kamelets documentation pages.'
+    },
+    {
+      htmlPath: 'public/camel-quarkus/next/index.html',
+      title: 'Camel Quarkus Documentation Index',
+      description: 'Index of Camel Quarkus documentation pages.'
+    },
+    {
+      htmlPath: 'public/camel-spring-boot/next/index.html',
+      title: 'Camel Spring Boot Documentation Index',
+      description: 'Index of Camel Spring Boot documentation pages.'
+    },
+    {
+      htmlPath: 'public/components/next/index.html',
+      title: 'Components Index',
+      description: 'Index of all Camel components.'
+    },
+    {
+      htmlPath: 'public/components/next/others/index.html',
+      title: 'Other Components Index',
+      description: 'Index of other Camel components.'
+    },
+    {
+      htmlPath: 'public/components/next/languages/index.html',
+      title: 'Languages Index',
+      description: 'Index of Camel expression and predicate languages.'
+    },
+    {
+      htmlPath: 'public/components/next/eips/index.html',
+      title: 'Enterprise Integration Patterns Index',
+      description: 'Index of Enterprise Integration Patterns (EIPs).'
+    },
+    {
+      htmlPath: 'public/components/next/dataformats/index.html',
+      title: 'Data Formats Index',
+      description: 'Index of Camel data formats.'
+    },
+    {
+      htmlPath: 'public/manual/index.html',
+      title: 'User Manual Index',
+      description: 'Index of Apache Camel user manual pages.'
+    },
+    {
+      htmlPath: 'public/manual/faq/index.html',
+      title: 'FAQ Index',
+      description: 'Frequently Asked Questions about Apache Camel.'
+    }
+  ];
+
+  // Process all HTML indexes
+  for (const config of htmlIndexes) {
+    await generateHtmlIndex(config);
+  }
+
+  console.log('All index files generation complete');
+}
+
+module.exports = {
+  generateHtmlIndex,
+  generateAllIndexes
+};
@@ -0,0 +1,106 @@
+const fs = require('fs');
+const xml2js = require('xml2js');
+
+/**
+ * Generic function to generate toon format markdown from RSS XML feeds.
+ * Converts RSS feeds to .xml.md files as per https://github.com/toon-format/toon specification.
+ *
+ * @param {Object} config - Configuration object
+ * @param {string} config.xmlPath - Path to the XML file (e.g., 'public/blog/index.xml')
+ * @param {string} config.title - Title for the markdown file (e.g., 'Apache Camel Blog')
+ * @param {string} config.description - Description text (e.g., 'Blog posts about Apache Camel')
+ * @param {string} config.itemsName - Name for the items collection (e.g., 'posts', 'releases')
+ */
+async function generateRssFeedIndex(config) {
+  const parser = new xml2js.Parser();
+
+  const { xmlPath, title, description, itemsName } = config;
+  const mdPath = xmlPath.replace(/\.xml$/, '.md');
+
+  try {
+    // Check if file exists
+    if (!fs.existsSync(xmlPath)) {
+      return;
+    }
+
+    // Read XML file
+    const xmlContent = fs.readFileSync(xmlPath, 'utf8');
+
+    // Parse XML to JavaScript object
+    const result = await parser.parseStringPromise(xmlContent);
+
+    let toonContent = '';
+
+    // Check if it's an RSS feed
+    if (result.rss && result.rss.channel && result.rss.channel[0]) {
+      const channel = result.rss.channel[0];
+      const items = channel.item || [];
+
+      // Create toon format header
+      toonContent = `# ${title}\n\n`;
+      toonContent += `${description}\n\n`;
+      toonContent += `${itemsName}[${items.length}]{title,link,pubDate,description}:\n`;
+
+      // Add each item
+      for (const item of items) {
+        const itemTitle = item.title ? item.title[0] : '';
+        const link = item.link ? item.link[0] : '';
+        const pubDate = item.pubDate ? item.pubDate[0] : '';
+        const itemDesc = item.description ? item.description[0].replace(/\n/g, ' ').substring(0, 200) : '';
+
+        // Convert links to markdown format
+        let mdLink = link;
+        if (mdLink.endsWith('/')) {
+          mdLink = mdLink + 'index.md';
+        } else {
+          mdLink = mdLink.replace(/\.html$/, '.md');
+        }
+
+        toonContent += `  ${itemTitle}|${mdLink}|${pubDate}|${itemDesc}\n`;
+      }
+    }
+
+    // Write toon format file
+    fs.writeFileSync(mdPath, toonContent, 'utf8');
+  } catch (error) {
+    console.error(`Error generating toon format for ${xmlPath}:`, error.message);
+  }
+}
+
+/**
+ * Generates toon format markdown for the Releases category RSS feed.
+ * Converts public/categories/Releases/index.xml to index.xml.md
+ * as per https://github.com/toon-format/toon specification.
+ */
+async function generateReleasesIndex() {
+  console.log('\nGenerating toon format for Releases index...');
+  await generateRssFeedIndex({
+    xmlPath: 'public/categories/Releases/index.xml',
+    title: 'Apache Camel Releases',
+    description: 'Release feed for Apache Camel and related projects.',
+    itemsName: 'releases'
+  });
+  console.log('Releases index toon format generation complete');
+}
+
+/**
+ * Generates toon format markdown for the Blog RSS feed.
+ * Converts public/blog/index.xml to index.xml.md
+ * as per https://github.com/toon-format/toon specification.
+ */
+async function generateBlogIndex() {
+  console.log('\nGenerating toon format for Blog index...');
+  await generateRssFeedIndex({
+    xmlPath: 'public/blog/index.xml',
+    title: 'Apache Camel Blog',
+    description: 'Blog posts about Apache Camel and related topics.',
+    itemsName: 'posts'
+  });
+  console.log('Blog index toon format generation complete');
+}
+
+module.exports = {
+  generateRssFeedIndex,
+  generateReleasesIndex,
+  generateBlogIndex
+};
@@ -0,0 +1,90 @@
+const fs = require('fs');
+const path = require('path');
+const xml2js = require('xml2js');
+
+/**
+ * Generates toon format sitemaps from XML sitemaps.
+ * Converts all sitemap*.xml files to toon format (.md files)
+ * as per https://github.com/toon-format/toon specification.
+ */
+async function generateToonSitemaps() {
+  const parser = new xml2js.Parser();
+  const glob = require('glob');
+
+  console.log('\nGenerating toon format sitemaps...');
+
+  // Find all sitemap*.xml files in the public directory
+  const sitemapFiles = glob.sync('public/sitemap*.xml');
+
+  if (sitemapFiles.length === 0) {
+    return;
+  }
+
+  for (const xmlPath of sitemapFiles) {
+    const sitemapFile = path.basename(xmlPath);
+    const toonPath = xmlPath.replace(/\.xml$/, '.md');
+
+    try {
+      // Read XML file
+      const xmlContent = fs.readFileSync(xmlPath, 'utf8');
+
+      // Parse XML to JavaScript object
+      const result = await parser.parseStringPromise(xmlContent);
+
+      let toonContent = '';
+
+      // Check if it's a sitemap index or a urlset
+      if (result.sitemapindex) {
+        // This is a sitemap index (sitemap.xml)
+        const sitemaps = result.sitemapindex.sitemap || [];
+        toonContent = `sitemaps[${sitemaps.length}]{loc}:\n`;
+        for (const sitemap of sitemaps) {
+          let loc = sitemap.loc ? sitemap.loc[0] : '';
+          // Convert .xml URLs to .md
+          loc = loc.replace(/\.xml$/, '.md');
+          toonContent += `  ${loc}\n`;
+        }
+      } else if (result.urlset) {
+        // This is a regular sitemap with URLs
+        const urls = result.urlset.url || [];
+        toonContent = `urls[${urls.length}]{loc,lastmod}:\n`;
+        for (const url of urls) {
+          let loc = url.loc ? url.loc[0] : '';
+          const lastmod = url.lastmod ? url.lastmod[0] : '';
+          // Convert .html URLs to .md
+          loc = loc.replace(/\.html$/, '.md');
+          toonContent += `  ${loc},${lastmod}\n`;
+        }
+      }
+
+      // Write toon format file
+      fs.writeFileSync(toonPath, toonContent, 'utf8');
+      console.log(`Generated ${sitemapFile.replace('.xml', '.md')}`);
+    } catch (error) {
+      console.error(`Error generating toon sitemap for ${sitemapFile}:`, error.message);
+    }
+  }
+
+  console.log(`Toon format sitemaps generation complete - ${sitemapFiles.length} files converted`);
+}
+
+/**
+ * Generates the /llms.txt file as per https://llmstxt.org/ specification.
+ * This file helps LLMs discover and understand the structure of the documentation.
+ * Reads from llms-txt-template.md and uses it as content.
+ *
+ * @param {Array<string>} pages - Array of page URLs that were converted to markdown
+ */
+function generateLlmsTxt(pages) {
+  // Read the template file
+  const templatePath = path.join(__dirname, '../../llms-txt-template.md');
+  let llmsTxtContent = fs.readFileSync(templatePath, 'utf8');
+
+  fs.writeFileSync('public/llms.txt', llmsTxtContent, 'utf8');
+  console.log('Generated /llms.txt');
+}
+
+module.exports = {
+  generateToonSitemaps,
+  generateLlmsTxt
+};