Skip to content
148 changes: 135 additions & 13 deletions mkdocs_redirects/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,51 @@
Copyright 2019-2022 DataRobot, Inc. and its affiliates.
All rights reserved.
"""
import logging
from __future__ import annotations

import os
import posixpath
from typing import TypedDict

from mkdocs import utils
from mkdocs.config import config_options
from mkdocs.plugins import BasePlugin
from mkdocs.plugins import BasePlugin, get_plugin_logger
from mkdocs.structure.files import File

log = logging.getLogger("mkdocs.plugin.redirects")
log = get_plugin_logger(__name__)


def gen_anchor_redirects(anchor_list: list):
"""
Generate a dictionary of redirects for anchors.

:param anchor_list: A list of tuples containing old anchors and new links.
:return: A string of JavaScript redirects for the anchors.
"""
js_redirects = ""
for old_anchor, new_link in anchor_list:
# Create a JavaScript redirect for each anchor
js_redirects += f"""
if (window.location.hash === "{old_anchor}") {{
location.href = "{new_link}";
Comment on lines +31 to +32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: I wonder if we shouldn't escape the variables properly. Something like ... = {old_anchor!r}, which I believe would work in Javascript code as well?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain a scenario where this would be useful?

Copy link
Contributor

@pawamoy pawamoy Jun 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can anchors contain "? Or would such characters be encoded in the percent format? If yes and not encoded, such anchors would break this Javascript code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that pretty much all headings in mkdocs go through a slugification process that removes special characters (mkdocs-material docs on this). I have personally never seen a link with this type of character in it, but I can add the escaping if you would like.

}}
"""
return js_redirects


# This template is used to generate the HTML file for the redirect.
HTML_TEMPLATE = """
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Redirecting...</title>
<link rel="canonical" href="{url}">
<script>var anchor=window.location.hash.substr(1);location.href="{url}"+(anchor?"#"+anchor:"")</script>
<script>
var anchor = window.location.hash.substr(1);
location.href = `{url}${{anchor ? '#' + anchor : ''}}`;
{redirects}
</script>
<meta http-equiv="refresh" content="0; url={url}">
</head>
<body>
Expand All @@ -30,8 +55,14 @@
</html>
"""

JS_INJECT_EXISTS = """
<script>
{redirects}
</script>
"""


def write_html(site_dir, old_path, new_path):
def write_html(site_dir: str, old_path: str, new_path: str, anchor_list: list[tuple[str, str]]) -> None:
"""Write an HTML file in the site_dir with a meta redirect to the new page"""
# Determine all relevant paths
old_path_abs = os.path.join(site_dir, old_path)
Expand All @@ -45,7 +76,8 @@ def write_html(site_dir, old_path, new_path):

# Write the HTML redirect file in place of the old file
log.debug("Creating redirect: '%s' -> '%s'", old_path, new_path)
content = HTML_TEMPLATE.format(url=new_path)
redirects = gen_anchor_redirects(anchor_list) # Example anchor map
content = HTML_TEMPLATE.format(url=new_path, redirects=redirects)
with open(old_path_abs, "w", encoding="utf-8") as f:
f.write(content)

Expand All @@ -68,55 +100,145 @@ def get_html_path(path, use_directory_urls):
return f.dest_path.replace(os.sep, "/")


class RedirectEntry(TypedDict):
hashes: list[tuple[str, str]]
overall: str


def build_redirect_entries(redirects: dict) -> dict[str, RedirectEntry]:
"""
This builds a more-detailed lookup table from the original old->new page mappings.

For each old page, it contains an overall redirect of where to go,
as well as specific redirects for each hash, contained in a (hash, redirect) structure.
"""
redirect_entries: dict[str, RedirectEntry] = {}
for page_old, page_new in redirects.items():
page_old_without_hash, old_hash = _split_hash_fragment(str(page_old))
if page_old_without_hash not in redirect_entries:
redirect_entries[page_old_without_hash] = {"hashes": [], "overall": ""}
if old_hash == "":
redirect_entries[page_old_without_hash]["overall"] = page_new
else:
redirect_entries[page_old_without_hash]["hashes"].append((old_hash, page_new))

# If a page doesn't have an overall redirect, use the first hash redirect
for page_old, redirect_map in redirect_entries.items():
if redirect_map.get("overall", "") == "":
redirect_entries[page_old]["overall"] = redirect_map["hashes"][0][1]

return redirect_entries


class RedirectPlugin(BasePlugin):
# Any options that this plugin supplies should go here.
config_scheme = (
("redirect_maps", config_options.Type(dict, default={})), # note the trailing comma
)

redirect_entries: dict[str, RedirectEntry]

# Build a list of redirects on file generation
def on_files(self, files, config, **kwargs):
self.redirects = self.config.get("redirect_maps", {})

# Validate user-provided redirect "old files"
for page_old in self.redirects:
if not utils.is_markdown_file(page_old):
log.warning("redirects plugin: '%s' is not a valid markdown file!", page_old)
page_old_without_hash, _ = _split_hash_fragment(str(page_old))
if not utils.is_markdown_file(page_old_without_hash):
log.warning(
"redirects plugin: '%s' is not a valid markdown file!", page_old_without_hash
)

# Build a dict of known document pages to validate against later
self.doc_pages = {}
for page in files.documentation_pages(): # object type: mkdocs.structure.files.File
self.doc_pages[page.src_path.replace(os.sep, "/")] = page

# Create a dictionary to hold anchor maps for redirects
self.redirect_entries = build_redirect_entries(self.redirects)

def on_page_content(self, html, page, config, files):
use_directory_urls = config.get("use_directory_urls")
page_old = page.file.src_uri
if page_old not in self.redirect_entries:
return html

hash_redirects = self.redirect_entries[page_old]["hashes"]
for i in range(len(hash_redirects)):
old_hash, new_link = hash_redirects[i]
hash_redirect_without_hash, new_hash = _split_hash_fragment(str(new_link))
# If we are redirecting to a page that exists, update the destination hash path.
if hash_redirect_without_hash in self.doc_pages:
file = self.doc_pages[hash_redirect_without_hash]
dest_hash_path = get_relative_html_path(
page_old, file.url + new_hash, use_directory_urls
)
hash_redirects[i] = (old_hash, dest_hash_path)

for old_hash, new_link in hash_redirects:
log.info(f"Injecting redirect for '{page_old}{old_hash}' to '{new_link}'")

js_redirects = JS_INJECT_EXISTS.format(
redirects=gen_anchor_redirects(hash_redirects)
)
return js_redirects + html

# Create HTML files for redirects after site dir has been built
def on_post_build(self, config, **kwargs):
# Determine if 'use_directory_urls' is set
use_directory_urls = config.get("use_directory_urls")
for page_old, redirect_entry in self.redirect_entries.items():
page_old_without_hash, _ = _split_hash_fragment(str(page_old))
# If the old page is a valid document page, it was injected in `on_page_content`.
if page_old_without_hash in self.doc_pages:
continue

# Walk through the redirect map and write their HTML files
for page_old, page_new in self.redirects.items():
# Need to remove hash fragment from new page to verify existence
page_new_without_hash, hash = _split_hash_fragment(str(page_new))
page_new = redirect_entry["overall"]
page_new_without_hash, new_hash = _split_hash_fragment(str(page_new))

# External redirect targets are easy, just use it as the target path
if page_new.lower().startswith(("http://", "https://")):
dest_path = page_new

# If the redirect target is a valid internal page, we need to create a relative path
elif page_new_without_hash in self.doc_pages:
file = self.doc_pages[page_new_without_hash]
dest_path = get_relative_html_path(page_old, file.url + hash, use_directory_urls)
dest_path = get_relative_html_path(
page_old, file.url + new_hash, use_directory_urls
)

# If the redirect target isn't external or a valid internal page, throw an error
# Note: we use 'warn' here specifically; mkdocs treats warnings specially when in strict mode
else:
log.warning("Redirect target '%s' does not exist!", page_new)
continue

# DO IT!
# Fixup all the individual hash link references to be relative.
hash_redirects = redirect_entry["hashes"]
for i in range(len(hash_redirects)):
old_hash, new_link = hash_redirects[i]
hash_redirect_without_hash, new_hash = _split_hash_fragment(new_link)
# If we are redirecting to a page that exists, update the destination hash path.
if hash_redirect_without_hash in self.doc_pages:
file = self.doc_pages[hash_redirect_without_hash]
dest_hash_path = get_relative_html_path(
page_old, file.url + new_hash, use_directory_urls
)
hash_redirects[i] = (old_hash, dest_hash_path)

log.info(f"Creating redirect for '{page_old}' to '{dest_path}'")
for old_hash, new_link in hash_redirects:
log.info(f"Creating redirect for '{page_old}{old_hash}' to '{new_link}'")

# Create a new HTML file for the redirect
dest_path = get_relative_html_path(page_old, dest_path, use_directory_urls)
write_html(
config["site_dir"],
get_html_path(page_old, use_directory_urls),
dest_path,
hash_redirects,
)


Expand Down
9 changes: 7 additions & 2 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pytest
from mkdocs.structure.files import File
from mkdocs.structure.pages import Page

from mkdocs_redirects import plugin

Expand All @@ -27,21 +28,25 @@
def run_redirect_test(monkeypatch, old_page, new_page, use_directory_urls):
wrote = ()

def write_html(site_dir, old_path, new_path):
def write_html(site_dir, old_path, new_path, anchor_list):
nonlocal wrote
wrote = (old_path, new_path)

monkeypatch.setattr(plugin, "write_html", write_html)

plg = plugin.RedirectPlugin()
plg.redirects = {old_page: new_page}
plg.redirect_entries = plugin.build_redirect_entries(plg.redirects)
plg.doc_pages = {
path: File(path, "docs", "site", use_directory_urls) for path in existing_pages
}
plg.doc_pages["the/fake.md"].dest_path = "fake/destination/index.html"
plg.doc_pages["the/fake.md"].url = plg.doc_pages["the/fake.md"]._get_url(use_directory_urls)

plg.on_post_build(dict(use_directory_urls=use_directory_urls, site_dir="site"))
config = dict(use_directory_urls=use_directory_urls, site_dir="site")
for entry in plg.doc_pages.values():
plg.on_page_content(None, Page(None, entry, config), config, None)
plg.on_post_build(config)

return wrote

Expand Down
Loading