Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
69 changes: 69 additions & 0 deletions apps/pages/templatetags/page_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Custom template tags and filters for the pages app."""

import re

from django import template
from django.utils.safestring import mark_safe
from django.utils.text import slugify

register = template.Library()

# Match h2–h4 elements; capture tag name, existing attributes, and inner HTML.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why skip H1?

Copy link
Author

@nagasrisai nagasrisai Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hugovk h1 is the page title ,there's only ever one per page and the page URL already points to it, so an anchor on it wouldn't be useful. The sections people actually want to link into are h2 and below (e.g. individual board resolutions or meeting minutes). This is also the convention most docs sites follow, including GitHub's own markdown renderer.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some pages with multiple H1s, such as https://www.python.org/downloads/release/python-31312/.

Does this only run on pages of board meeting resolutions/minutes? Do they all at most have a single H1?

Copy link
Author

@nagasrisai nagasrisai Mar 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hugovk Good catch ,the filter is only wired up to templates/psf/default.html, not the general pages template. So the Python release pages (like the downloads page you linked) go through a different template and won't be touched by this at all.

For PSF pages specifically, the H1 is always the page title coming from the CMS page.title field, rendered directly in the template as <h1 class="page-title">{{ page.title }}</h1> — separate from page.content where the filter runs. So the content fed into add_heading_anchors shouldn't contain any H1s.

That said, happy to extend the regex to h1–h4 if you'd prefer a more defensive approach. just let me know.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tested this PR?

# Using DOTALL so inner content can span multiple lines.
_HEADING_RE = re.compile(r"<(h[2-4])([^>]*)>(.*?)</\1>", re.IGNORECASE | re.DOTALL)


@register.filter(is_safe=True)
def add_heading_anchors(html):
"""Add ``id`` attributes and self-link anchors to h2–h4 headings.

Given the rendered HTML of a CMS page, this filter finds every ``<h2>``,
``<h3>``, and ``<h4>`` element and:

1. Derives a URL-safe ``id`` from the heading's plain text (via
:func:`django.utils.text.slugify`).
2. Appends a ``-N`` suffix when the same slug appears more than once on
the page, preventing duplicate ``id`` values.
3. Injects a pilcrow (¶) anchor *inside* the heading so visitors can
copy a direct link to any section.

The filter is safe to apply to any page — headings that already carry an
``id`` attribute are left untouched, and headings whose text produces an
empty slug are also skipped.

Usage in a template::

{% load page_tags %}
{{ page.content|add_heading_anchors }}
"""
seen_slugs: dict[str, int] = {}

def _replace(match: re.Match) -> str:
tag = match.group(1).lower()
attrs = match.group(2)
inner = match.group(3)

# Skip headings that already have an id attribute.
if re.search(r'\bid\s*=', attrs, re.IGNORECASE):
return match.group(0)

# Derive a slug from the plain text (strip any nested HTML tags).
plain_text = re.sub(r"<[^>]+>", "", inner).strip()
base_slug = slugify(plain_text)

if not base_slug:
return match.group(0)

# Deduplicate: first occurrence keeps the bare slug; subsequent
# occurrences become slug-2, slug-3, …
count = seen_slugs.get(base_slug, 0) + 1
seen_slugs[base_slug] = count
anchor_id = base_slug if count == 1 else f"{base_slug}-{count}"

link = (
f'<a class="headerlink" href="#{anchor_id}" '
f'title="Link to this section" aria-label="Link to this section">¶</a>'
)
return f'<{tag} id="{anchor_id}"{attrs}>{inner} {link}</{tag}>'

return mark_safe(_HEADING_RE.sub(_replace, str(html)))
77 changes: 77 additions & 0 deletions apps/pages/tests/test_templatetags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Tests for apps/pages/templatetags/page_tags.py."""

from django.test import SimpleTestCase

from apps.pages.templatetags.page_tags import add_heading_anchors


class AddHeadingAnchorsFilterTests(SimpleTestCase):
"""Tests for the ``add_heading_anchors`` template filter."""

def test_h2_gets_id_and_anchor_link(self):
"""An h2 heading receives an id attribute and a pilcrow anchor link."""
html = "<h2>2023</h2>"
result = add_heading_anchors(html)
self.assertIn('id="2023"', result)
self.assertIn('href="#2023"', result)
self.assertIn("¶", result)

def test_h3_and_h4_also_processed(self):
"""h3 and h4 headings are also processed."""
for tag in ("h3", "h4"):
html = f"<{tag}>Section Title</{tag}>"
result = add_heading_anchors(html)
self.assertIn('id="section-title"', result)
self.assertIn('href="#section-title"', result)

def test_h1_and_h5_are_not_changed(self):
"""h1 and h5 headings are left untouched."""
for tag in ("h1", "h5"):
html = f"<{tag}>Title</{tag}>"
result = add_heading_anchors(html)
self.assertNotIn("id=", result)
self.assertNotIn("href=", result)

def test_duplicate_headings_get_unique_ids(self):
"""Duplicate heading text produces unique, numbered ids."""
html = "<h2>Board Resolution</h2><h2>Board Resolution</h2>"
result = add_heading_anchors(html)
self.assertIn('id="board-resolution"', result)
self.assertIn('id="board-resolution-2"', result)

def test_heading_with_existing_id_is_unchanged(self):
"""A heading that already has an id attribute is left as-is."""
html = '<h2 id="custom-id">My Section</h2>'
result = add_heading_anchors(html)
self.assertIn('id="custom-id"', result)
# No extra anchor link should be injected.
self.assertNotIn("headerlink", result)

def test_heading_with_nested_html_tags(self):
"""Plain text is extracted from headings that contain nested tags."""
html = "<h2><em>Nested</em> Heading</h2>"
result = add_heading_anchors(html)
self.assertIn('id="nested-heading"', result)

def test_non_heading_html_is_unchanged(self):
"""Non-heading elements are passed through unmodified."""
html = "<p>Some paragraph</p><ul><li>Item</li></ul>"
result = add_heading_anchors(html)
self.assertEqual(str(result), html)

def test_empty_string_returns_empty_string(self):
"""Passing an empty string returns an empty string."""
self.assertEqual(str(add_heading_anchors("")), "")

def test_heading_with_empty_text_is_unchanged(self):
"""A heading whose text slugifies to an empty string is left alone."""
html = "<h2> </h2>"
result = add_heading_anchors(html)
self.assertNotIn("id=", result)

def test_anchor_link_is_inside_heading(self):
"""The pilcrow anchor link appears inside the heading element."""
html = "<h2>Resolutions 2022</h2>"
result = str(add_heading_anchors(html))
# The closing </h2> must come after the anchor link.
self.assertIn("¶</a></h2>", result)
4 changes: 3 additions & 1 deletion templates/psf/default.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
{% extends "base.html" %}
{% load boxes %}
{% load banners %}
{% load page_tags %}

{# TODO: Try to deduplicate this and templates/pages/default.html. #}
{% block page_title %}{{ page.title }} | Python Software Foundation{% endblock %}
Expand Down Expand Up @@ -52,7 +53,7 @@
<h1 class="page-title">{{ page.title }}</h1>
</header>

{{ page.content }}
{{ page.content|add_heading_anchors }}

</article>
{% endblock content %}
Expand All @@ -71,3 +72,4 @@ <h1 class="page-title">{{ page.title }}</h1>
</aside>

{% endblock left_sidebar %}