|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Generate a sitemap.xml file for the mdBook site. |
| 4 | +This script should be run after the book is built. |
| 5 | +""" |
| 6 | + |
| 7 | +import os |
| 8 | +import datetime |
| 9 | +import xml.dom.minidom |
| 10 | +import xml.etree.ElementTree as ET |
| 11 | +from pathlib import Path |
| 12 | + |
| 13 | +# Configuration |
| 14 | +SITE_URL = "https://oxidize-rb.github.io/rb-sys" # Base URL of your site |
| 15 | +BOOK_DIR = "book" # Directory where the book is built |
| 16 | +OUTPUT_DIR = os.path.join(BOOK_DIR, "html") # Directory where the HTML files are |
| 17 | +SITEMAP_PATH = os.path.join(OUTPUT_DIR, "sitemap.xml") # Path to the sitemap.xml file |
| 18 | +CHANGE_FREQ = "weekly" # How frequently the page is likely to change |
| 19 | +PRIORITY = "0.8" # Priority of this URL relative to other URLs on your site |
| 20 | + |
| 21 | +def generate_sitemap(): |
| 22 | + """Generate a sitemap.xml file for the mdBook site.""" |
| 23 | + # Create the root element |
| 24 | + urlset = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") |
| 25 | + |
| 26 | + # Get the current date in the format required by sitemaps |
| 27 | + today = datetime.datetime.now().strftime("%Y-%m-%d") |
| 28 | + |
| 29 | + # Walk through the output directory |
| 30 | + for root, _, files in os.walk(OUTPUT_DIR): |
| 31 | + for file in files: |
| 32 | + if file.endswith(".html") and file != "404.html" and file != "print.html": |
| 33 | + # Get the relative path from the output directory |
| 34 | + rel_path = os.path.relpath(os.path.join(root, file), OUTPUT_DIR) |
| 35 | + |
| 36 | + # Convert Windows path separators to URL path separators |
| 37 | + rel_path = rel_path.replace("\\", "/") |
| 38 | + |
| 39 | + # Create the URL |
| 40 | + if rel_path == "index.html": |
| 41 | + url = SITE_URL |
| 42 | + else: |
| 43 | + url = f"{SITE_URL}/{rel_path}" |
| 44 | + |
| 45 | + # Create the URL element |
| 46 | + url_element = ET.SubElement(urlset, "url") |
| 47 | + loc = ET.SubElement(url_element, "loc") |
| 48 | + loc.text = url |
| 49 | + lastmod = ET.SubElement(url_element, "lastmod") |
| 50 | + lastmod.text = today |
| 51 | + changefreq = ET.SubElement(url_element, "changefreq") |
| 52 | + changefreq.text = CHANGE_FREQ |
| 53 | + priority = ET.SubElement(url_element, "priority") |
| 54 | + priority.text = PRIORITY |
| 55 | + |
| 56 | + # Create the XML tree |
| 57 | + tree = ET.ElementTree(urlset) |
| 58 | + |
| 59 | + # Pretty print the XML |
| 60 | + xmlstr = xml.dom.minidom.parseString(ET.tostring(urlset)).toprettyxml(indent=" ") |
| 61 | + |
| 62 | + # Write the XML to the sitemap.xml file |
| 63 | + with open(SITEMAP_PATH, "w", encoding="utf-8") as f: |
| 64 | + f.write(xmlstr) |
| 65 | + |
| 66 | + print(f"Sitemap generated at {SITEMAP_PATH}") |
| 67 | + |
| 68 | +if __name__ == "__main__": |
| 69 | + # Make sure the output directory exists |
| 70 | + os.makedirs(OUTPUT_DIR, exist_ok=True) |
| 71 | + |
| 72 | + # Generate the sitemap |
| 73 | + generate_sitemap() |
0 commit comments