From 65c8288b99dd93602f771417b86c0455a699a7b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefanos=20Carlstr=C3=B6m?= Date: Wed, 5 Nov 2025 10:48:30 +0100 Subject: [PATCH] Support XID_Start/XID_Continue Unicode chars in C++ identifiers (#14026) --- AUTHORS.rst | 1 + CHANGES.rst | 4 ++++ pyproject.toml | 1 + sphinx/util/cfamily.py | 8 ++++++-- tests/test_domains/test_domain_cpp.py | 4 ++++ 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 707c77aec04..e37a9f01d88 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -105,6 +105,7 @@ Contributors * Slawek Figiel -- additional warning suppression * Stefan Seefeld -- toctree improvements * Stefan van der Walt -- autosummary extension +* Stefanos Carlström -- minor bug fix * Steve Piercy -- documentation improvements * Szymon Karpinski -- intersphinx improvements * \T. Powers -- HTML output improvements diff --git a/CHANGES.rst b/CHANGES.rst index f0f94fda396..4ee66597dd8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,10 @@ Release 8.3.0 (in development) Dependencies ------------ +* #14026: Added `regex >= 2025.7.29`_, to support parsing for Unicode classes. + + .. _regex >= 2025.7.29: _https://github.com/mrabarnett/mrab-regex + * #13786: Support `Docutils 0.22`_. Patch by Adam Turner. .. _Docutils 0.22: https://docutils.sourceforge.io/RELEASE-NOTES.html#release-0-22-2026-07-29 diff --git a/pyproject.toml b/pyproject.toml index e3418ab98a2..88b3c479f57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ dependencies = [ "packaging>=23.0", "colorama>=0.4.6; sys_platform == 'win32'", "ipython>=9.6.0", + "regex>=2025.7.29" ] dynamic = ["version"] diff --git a/sphinx/util/cfamily.py b/sphinx/util/cfamily.py index 6071d90cf74..385ef9f7b02 100644 --- a/sphinx/util/cfamily.py +++ b/sphinx/util/cfamily.py @@ -2,7 +2,7 @@ from __future__ import annotations -import re +import regex as re from copy import deepcopy from typing import TYPE_CHECKING @@ -30,9 +30,13 @@ ( # This 'extends' _anon_identifier_re with the ordinary identifiers, # make sure they are in sync. (~?\b[a-zA-Z_]) # ordinary identifiers + | \p{XID_Start} # Unicode-allowed starting characters for identifiers | (@[a-zA-Z0-9_]) # our extension for names of anonymous entities ) - [a-zA-Z0-9_]*\b + ( + [a-zA-Z0-9_] # ordinary identifiers + | \p{XID_Continue} # Unicode-allowed continuing characters for identifiers + )*\b """, flags=re.VERBOSE, ) diff --git a/tests/test_domains/test_domain_cpp.py b/tests/test_domains/test_domain_cpp.py index 2e88625a9fe..4c3172e9703 100644 --- a/tests/test_domains/test_domain_cpp.py +++ b/tests/test_domains/test_domain_cpp.py @@ -860,6 +860,10 @@ def test_domain_cpp_ast_function_definitions() -> None: check('function', 'decltype(auto) f()', {1: 'f', 2: '1fv'}) + # Test derived from https://github.com/sphinx-doc/sphinx/issues/14026 + # Unicode identifiers + check('function', 'void f(int *const ξ)', {1: 'f__iPC', 2: '1fPCi'}) + # TODO: make tests for functions in a template, e.g., Test # such that the id generation for function type types is correct.