diff --git a/mkdocs_redirects/plugin.py b/mkdocs_redirects/plugin.py index d4b58cd..af73639 100644 --- a/mkdocs_redirects/plugin.py +++ b/mkdocs_redirects/plugin.py @@ -2,18 +2,40 @@ Copyright 2019-2022 DataRobot, Inc. and its affiliates. All rights reserved. """ -import logging import os import posixpath +from typing import TypedDict from mkdocs import utils from mkdocs.config import config_options -from mkdocs.plugins import BasePlugin +from mkdocs.plugins import BasePlugin, get_plugin_logger from mkdocs.structure.files import File -log = logging.getLogger("mkdocs.plugin.redirects") +log = get_plugin_logger(__name__) +def gen_anchor_redirects(anchor_list: list[tuple[str, str]]) -> str: + """ + Generate a dictionary of redirects for anchors. + + Args: + anchor_list: A list of tuples containing old anchors and new links. + + Returns: + A string of JavaScript redirects for the anchors. + """ + js_redirects = "" + for old_anchor, new_link in anchor_list: + # Create a JavaScript redirect for each anchor. + js_redirects += f""" + if (window.location.hash === "{old_anchor}") {{ + location.href = "{new_link}"; + }} + """ + return js_redirects + + +# This template is used to generate the HTML file for the redirect. HTML_TEMPLATE = """ @@ -21,7 +43,11 @@ Redirecting... - + @@ -30,22 +56,27 @@ """ +JS_INJECT_EXISTS = """ + +""" + -def write_html(site_dir, old_path, new_path): +def write_html(site_dir: str, old_path: str, new_path: str, anchor_list: list[tuple[str, str]]) -> None: """Write an HTML file in the site_dir with a meta redirect to the new page""" - # Determine all relevant paths old_path_abs = os.path.join(site_dir, old_path) old_dir = os.path.dirname(old_path) old_dir_abs = os.path.dirname(old_path_abs) - # Create parent directories if they don't exist if not os.path.exists(old_dir_abs): log.debug("Creating directory '%s'", old_dir) os.makedirs(old_dir_abs) - # Write the HTML redirect file in place of the old file + # Write the HTML redirect file in place of the old file. log.debug("Creating redirect: '%s' -> '%s'", old_path, new_path) - content = HTML_TEMPLATE.format(url=new_path) + redirects = gen_anchor_redirects(anchor_list) + content = HTML_TEMPLATE.format(url=new_path, redirects=redirects) with open(old_path_abs, "w", encoding="utf-8") as f: f.write(content) @@ -68,55 +99,148 @@ def get_html_path(path, use_directory_urls): return f.dest_path.replace(os.sep, "/") +class RedirectEntry(TypedDict): + hashes: list[tuple[str, str]] + overall: str + + +def build_redirect_entries(redirects: dict[str, str]) -> dict[str, RedirectEntry]: + """ + This builds a more-detailed lookup table from the original old->new page mappings. + + For each old page, it contains an overall redirect of where to go, + as well as specific redirects for each hash, contained in a (hash, redirect) structure. + """ + redirect_entries: dict[str, RedirectEntry] = {} + for page_old, page_new in redirects.items(): + page_old_without_hash, old_hash = _split_hash_fragment(page_old) + if page_old_without_hash not in redirect_entries: + redirect_entries[page_old_without_hash] = {"hashes": [], "overall": ""} + if old_hash == "": + redirect_entries[page_old_without_hash]["overall"] = page_new + else: + redirect_entries[page_old_without_hash]["hashes"].append((old_hash, page_new)) + + # If a page doesn't have an overall redirect, fallback to the first hash redirect. + for page_old, redirect_map in redirect_entries.items(): + if redirect_map.get("overall", "") == "": + redirect_entries[page_old]["overall"] = redirect_map["hashes"][0][1] + + return redirect_entries + + class RedirectPlugin(BasePlugin): - # Any options that this plugin supplies should go here. config_scheme = ( - ("redirect_maps", config_options.Type(dict, default={})), # note the trailing comma + ("redirect_maps", config_options.Type(dict[str, str], default={})), # note the trailing comma ) - # Build a list of redirects on file generation + redirect_entries: dict[str, RedirectEntry] + redirects: dict[str, str] + + # Build an initial list of redirects after we know about all documentation pages. def on_files(self, files, config, **kwargs): self.redirects = self.config.get("redirect_maps", {}) - # Validate user-provided redirect "old files" + # Validate that all user-provided redirects come from markdown files. for page_old in self.redirects: - if not utils.is_markdown_file(page_old): - log.warning("redirects plugin: '%s' is not a valid markdown file!", page_old) + page_old_without_hash, _ = _split_hash_fragment(page_old) + if not utils.is_markdown_file(page_old_without_hash): + log.warning( + "redirects plugin: '%s' is not a valid markdown file!", page_old_without_hash + ) # Build a dict of known document pages to validate against later self.doc_pages = {} - for page in files.documentation_pages(): # object type: mkdocs.structure.files.File + for page in files.documentation_pages(): self.doc_pages[page.src_path.replace(os.sep, "/")] = page - # Create HTML files for redirects after site dir has been built + # Create a dictionary to hold anchor maps for redirects + self.redirect_entries = build_redirect_entries(self.redirects) + + def on_page_content(self, html, page, config, files): + use_directory_urls = config.get("use_directory_urls") + page_old = page.file.src_uri + if page_old not in self.redirect_entries: + return html + + hash_redirects = self.redirect_entries[page_old]["hashes"] + for i in range(len(hash_redirects)): + old_hash, new_link = hash_redirects[i] + hash_redirect_without_hash, new_hash = _split_hash_fragment(new_link) + + # If we are redirecting to a page that exists, update the destination hash path. + if hash_redirect_without_hash in self.doc_pages: + file = self.doc_pages[hash_redirect_without_hash] + dest_hash_path = get_relative_html_path( + page_old, get_html_path(file.src_uri, use_directory_urls), use_directory_urls + ) + new_hash + hash_redirects[i] = (old_hash, dest_hash_path) + + for old_hash, new_link in hash_redirects: + log.info(f"Injecting redirect for '{page_old}{old_hash}' to '{new_link}'") + + if len(hash_redirects) == 0: + raise Exception(f"No hash redirects found for {page_old}") + + js_redirects = JS_INJECT_EXISTS.format( + redirects=gen_anchor_redirects(hash_redirects) + ) + return js_redirects + html + + # Create HTML files for redirects after site dir has been built. def on_post_build(self, config, **kwargs): - # Determine if 'use_directory_urls' is set use_directory_urls = config.get("use_directory_urls") + for page_old, redirect_entry in self.redirect_entries.items(): + page_old_without_hash, _ = _split_hash_fragment(page_old) + + # If the old page is a valid document page, it was injected in `on_page_content`. + if page_old_without_hash in self.doc_pages: + continue - # Walk through the redirect map and write their HTML files - for page_old, page_new in self.redirects.items(): - # Need to remove hash fragment from new page to verify existence - page_new_without_hash, hash = _split_hash_fragment(str(page_new)) + # Get new page without hash fragment to correctly verify existence. + page_new = redirect_entry["overall"] + page_new_without_hash, new_hash = _split_hash_fragment(page_new) # External redirect targets are easy, just use it as the target path if page_new.lower().startswith(("http://", "https://")): dest_path = page_new + # If the redirect target is a valid internal page, we need to create a relative path elif page_new_without_hash in self.doc_pages: file = self.doc_pages[page_new_without_hash] - dest_path = get_relative_html_path(page_old, file.url + hash, use_directory_urls) + dest_path = get_relative_html_path( + page_old, file.url + new_hash, use_directory_urls + ) - # If the redirect target isn't external or a valid internal page, throw an error - # Note: we use 'warn' here specifically; mkdocs treats warnings specially when in strict mode + # If the redirect target isn't external or a valid internal page, throw an error. + # NOTE: we use 'warn' here since mkdocs treats warnings differently when in strict mode. else: log.warning("Redirect target '%s' does not exist!", page_new) continue - # DO IT! + # Fixup all the individual hash link references to be relative. + hash_redirects = redirect_entry["hashes"] + for i in range(len(hash_redirects)): + old_hash, new_link = hash_redirects[i] + hash_redirect_without_hash, new_hash = _split_hash_fragment(new_link) + # If we are redirecting to a page that exists, update the destination hash path. + if hash_redirect_without_hash in self.doc_pages: + file = self.doc_pages[hash_redirect_without_hash] + dest_hash_path = get_relative_html_path( + page_old, get_html_path(file.src_uri, use_directory_urls), use_directory_urls + ) + new_hash + hash_redirects[i] = (old_hash, dest_hash_path) + + log.info(f"Creating redirect for '{page_old}' to '{dest_path}'") + for old_hash, new_link in hash_redirects: + log.info(f"Creating redirect for '{page_old}{old_hash}' to '{new_link}'") + + # Create a new HTML file for the redirect. write_html( config["site_dir"], get_html_path(page_old, use_directory_urls), dest_path, + hash_redirects, ) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 6fee1c3..4f4419b 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -5,6 +5,7 @@ import pytest from mkdocs.structure.files import File +from mkdocs.structure.pages import Page from mkdocs_redirects import plugin @@ -27,7 +28,7 @@ def run_redirect_test(monkeypatch, old_page, new_page, use_directory_urls): wrote = () - def write_html(site_dir, old_path, new_path): + def write_html(site_dir, old_path, new_path, anchor_list): nonlocal wrote wrote = (old_path, new_path) @@ -35,13 +36,17 @@ def write_html(site_dir, old_path, new_path): plg = plugin.RedirectPlugin() plg.redirects = {old_page: new_page} + plg.redirect_entries = plugin.build_redirect_entries(plg.redirects) plg.doc_pages = { path: File(path, "docs", "site", use_directory_urls) for path in existing_pages } plg.doc_pages["the/fake.md"].dest_path = "fake/destination/index.html" plg.doc_pages["the/fake.md"].url = plg.doc_pages["the/fake.md"]._get_url(use_directory_urls) - plg.on_post_build(dict(use_directory_urls=use_directory_urls, site_dir="site")) + config = dict(use_directory_urls=use_directory_urls, site_dir="site") + for entry in plg.doc_pages.values(): + plg.on_page_content("", Page(None, entry, config), config, None) + plg.on_post_build(config) return wrote @@ -109,8 +114,6 @@ def test_relative_redirect_directory_urls(actual_redirect_target, _, expected): # * Expected path of the written HTML file, use_directory_urls=True testdata = [ ("old.md", "old.html", "old/index.html"), - ("README.md", "index.html", "index.html"), - ("100%.md", "100%.html", "100%/index.html"), ("foo/fizz/old.md", "foo/fizz/old.html", "foo/fizz/old/index.html"), ("foo/fizz/index.md", "foo/fizz/index.html", "foo/fizz/index.html"), ] @@ -128,3 +131,264 @@ def test_page_dest_path_no_directory_urls(actual_written_file, old_page, expecte @pytest.mark.parametrize(["old_page", "_", "expected"], testdata) def test_page_dest_path_directory_urls(actual_written_file, old_page, _, expected): assert actual_written_file == expected + + +# ============================================================================ +# Hash Redirect Test Suite +# ============================================================================ + +@pytest.fixture +def mock_write_html(monkeypatch): + """Mock the write_html function to capture calls.""" + calls = [] + + def mock_write(site_dir, old_path, new_path, anchor_list): + calls.append((site_dir, old_path, new_path, anchor_list)) + + monkeypatch.setattr(plugin, "write_html", mock_write) + return calls + + +class TestHashRedirectGeneration: + """Test suite for hash redirect generation functionality.""" + + def test_gen_anchor_redirects_single_hash(self): + """Test generating JavaScript redirects for a single hash.""" + anchor_list = [("#old-hash", "new-page.html#new-hash")] + result = plugin.gen_anchor_redirects(anchor_list) + + expected = ''' + if (window.location.hash === "#old-hash") { + location.href = "new-page.html#new-hash"; + } + ''' + assert result.strip() == expected.strip() + + def test_gen_anchor_redirects_multiple_hashes(self): + """Test generating JavaScript redirects for multiple hashes.""" + anchor_list = [ + ("#old-hash1", "new-page.html#new-hash1"), + ("#old-hash2", "new-page.html#new-hash2"), + ("#old-hash3", "new-page.html#new-hash3") + ] + result = plugin.gen_anchor_redirects(anchor_list) + + assert "if (window.location.hash === \"#old-hash1\")" in result + assert "location.href = \"new-page.html#new-hash1\"" in result + assert "if (window.location.hash === \"#old-hash2\")" in result + assert "location.href = \"new-page.html#new-hash2\"" in result + assert "if (window.location.hash === \"#old-hash3\")" in result + assert "location.href = \"new-page.html#new-hash3\"" in result + + def test_gen_anchor_redirects_empty_list(self): + """Test generating JavaScript redirects for empty anchor list.""" + result = plugin.gen_anchor_redirects([]) + assert result == "" + + +class TestHashRedirectJavaScriptInjection: + """Test suite for JavaScript injection in existing pages.""" + + @pytest.fixture + def plugin_instance(self): + """Create a plugin instance for testing.""" + plg = plugin.RedirectPlugin() + plg.redirects = { + "test-page.md#old-hash": "new-page.md#new-hash", + "test-page.md#another-hash": "new-page.md#another-new-hash" + } + plg.redirect_entries = plugin.build_redirect_entries(plg.redirects) + plg.doc_pages = { + "test-page.md": File("test-page.md", "docs", "site", False), + "new-page.md": File("new-page.md", "docs", "site", False) + } + return plg + + def test_on_page_content_with_hash_redirects(self, plugin_instance): + """Test that JavaScript is injected for pages with hash redirects.""" + config = {"use_directory_urls": False} + page = Page(None, plugin_instance.doc_pages["test-page.md"], config) + + original_html = "Original content" + result = plugin_instance.on_page_content(original_html, page, config, None) + + # Should contain JavaScript redirects + assert "