Skip to content

Commit f7a1397

Browse files
authored
[console] enhance detection and elimination of known ANSI escape sequences (sphinx-doc#12216)
This PR improves the logic for detecting and eliminating ANSI color codes and other escape sequences introduced by Sphinx. ANSI escape sequences that are not natively known to Sphinx are not eliminated (e.g., VT100-specific functions).
1 parent df3cde6 commit f7a1397

File tree

4 files changed

+142
-18
lines changed

4 files changed

+142
-18
lines changed

sphinx/util/_io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from typing import TYPE_CHECKING
44

5-
from sphinx.util.console import _strip_escape_sequences
5+
from sphinx.util.console import strip_escape_sequences
66

77
if TYPE_CHECKING:
88
from typing import Protocol
@@ -25,7 +25,7 @@ def __init__(
2525

2626
def write(self, text: str, /) -> None:
2727
self.stream_term.write(text)
28-
self.stream_file.write(_strip_escape_sequences(text))
28+
self.stream_file.write(strip_escape_sequences(text))
2929

3030
def flush(self) -> None:
3131
if hasattr(self.stream_term, 'flush'):

sphinx/util/console.py

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,25 @@ def turquoise(text: str) -> str: ... # NoQA: E704
4444
except ImportError:
4545
colorama = None
4646

47+
_CSI: Final[str] = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer
4748

48-
_CSI = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer
49-
_ansi_re: re.Pattern[str] = re.compile(
49+
# Pattern matching ANSI control sequences containing colors.
50+
_ansi_color_re: Final[re.Pattern[str]] = re.compile(r'\x1b\[(?:\d+;){0,2}\d*m')
51+
52+
_ansi_re: Final[re.Pattern[str]] = re.compile(
5053
_CSI
5154
+ r"""
52-
(
53-
(\d\d;){0,2}\d\dm # ANSI colour code
55+
(?:
56+
(?:\d+;){0,2}\d*m # ANSI color code ('m' is equivalent to '0m')
5457
|
55-
\dK # ANSI Erase in Line
58+
[012]?K # ANSI Erase in Line ('K' is equivalent to '0K')
5659
)""",
5760
re.VERBOSE | re.ASCII,
5861
)
59-
_ansi_color_re: Final[re.Pattern[str]] = re.compile('\x1b.*?m')
62+
"""Pattern matching ANSI CSI colors (SGR) and erase line (EL) sequences.
63+
64+
See :func:`strip_escape_sequences` for details.
65+
"""
6066

6167
codes: dict[str, str] = {}
6268

@@ -80,7 +86,7 @@ def term_width_line(text: str) -> str:
8086
return text + '\n'
8187
else:
8288
# codes are not displayed, this must be taken into account
83-
return text.ljust(_tw + len(text) - len(_ansi_re.sub('', text))) + '\r'
89+
return text.ljust(_tw + len(text) - len(strip_escape_sequences(text))) + '\r'
8490

8591

8692
def color_terminal() -> bool:
@@ -128,11 +134,39 @@ def escseq(name: str) -> str:
128134

129135

130136
def strip_colors(s: str) -> str:
137+
"""Remove the ANSI color codes in a string *s*.
138+
139+
.. caution::
140+
141+
This function is not meant to be used in production and should only
142+
be used for testing Sphinx's output messages.
143+
144+
.. seealso:: :func:`strip_escape_sequences`
145+
"""
131146
return _ansi_color_re.sub('', s)
132147

133148

134-
def _strip_escape_sequences(s: str) -> str:
135-
return _ansi_re.sub('', s)
149+
def strip_escape_sequences(text: str, /) -> str:
150+
r"""Remove the ANSI CSI colors and "erase in line" sequences.
151+
152+
Other `escape sequences `__ (e.g., VT100-specific functions) are not
153+
supported and only control sequences *natively* known to Sphinx (i.e.,
154+
colors declared in this module and "erase entire line" (``'\x1b[2K'``))
155+
are eliminated by this function.
156+
157+
.. caution::
158+
159+
This function is not meant to be used in production and should only
160+
be used for testing Sphinx's output messages that were not tempered
161+
with by third-party extensions.
162+
163+
.. versionadded:: 7.3
164+
165+
This function is added as an *experimental* feature.
166+
167+
__ https://en.wikipedia.org/wiki/ANSI_escape_code
168+
"""
169+
return _ansi_re.sub('', text)
136170

137171

138172
def create_color_func(name: str) -> None:
@@ -151,8 +185,8 @@ def inner(text: str) -> str:
151185
'blink': '05m',
152186
}
153187

154-
for _name, _value in _attrs.items():
155-
codes[_name] = '\x1b[' + _value
188+
for __name, __value in _attrs.items():
189+
codes[__name] = '\x1b[' + __value
156190

157191
_colors = [
158192
('black', 'darkgray'),
@@ -165,9 +199,9 @@ def inner(text: str) -> str:
165199
('lightgray', 'white'),
166200
]
167201

168-
for i, (dark, light) in enumerate(_colors, 30):
169-
codes[dark] = '\x1b[%im' % i
170-
codes[light] = '\x1b[%im' % (i + 60)
202+
for __i, (__dark, __light) in enumerate(_colors, 30):
203+
codes[__dark] = '\x1b[%im' % __i
204+
codes[__light] = '\x1b[%im' % (__i + 60)
171205

172206
_orig_codes = codes.copy()
173207

sphinx/util/exceptions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import TYPE_CHECKING
77

88
from sphinx.errors import SphinxParallelError
9-
from sphinx.util.console import _strip_escape_sequences
9+
from sphinx.util.console import strip_escape_sequences
1010

1111
if TYPE_CHECKING:
1212
from sphinx.application import Sphinx
@@ -31,7 +31,7 @@ def save_traceback(app: Sphinx | None, exc: BaseException) -> str:
3131
last_msgs = exts_list = ''
3232
else:
3333
extensions = app.extensions.values()
34-
last_msgs = '\n'.join(f'# {_strip_escape_sequences(s).strip()}'
34+
last_msgs = '\n'.join(f'# {strip_escape_sequences(s).strip()}'
3535
for s in app.messagelog)
3636
exts_list = '\n'.join(f'# {ext.name} ({ext.version})' for ext in extensions
3737
if ext.version != 'builtin')
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from __future__ import annotations
2+
3+
import itertools
4+
import operator
5+
from typing import TYPE_CHECKING
6+
7+
import pytest
8+
9+
from sphinx.util.console import blue, reset, strip_colors, strip_escape_sequences
10+
11+
if TYPE_CHECKING:
12+
from collections.abc import Callable, Sequence
13+
from typing import Final, TypeVar
14+
15+
_T = TypeVar('_T')
16+
17+
CURSOR_UP: Final[str] = '\x1b[2A' # ignored ANSI code
18+
ERASE_LINE: Final[str] = '\x1b[2K' # supported ANSI code
19+
TEXT: Final[str] = '\x07 Hello world!'
20+
21+
22+
@pytest.mark.parametrize(
23+
('strip_function', 'ansi_base_blocks', 'text_base_blocks'),
24+
[
25+
(
26+
strip_colors,
27+
# double ERASE_LINE so that the tested strings may have 2 of them
28+
[TEXT, blue(TEXT), reset(TEXT), ERASE_LINE, ERASE_LINE, CURSOR_UP],
29+
# :func:`strip_colors` removes color codes but keeps ERASE_LINE and CURSOR_UP
30+
[TEXT, TEXT, TEXT, ERASE_LINE, ERASE_LINE, CURSOR_UP],
31+
),
32+
(
33+
strip_escape_sequences,
34+
# double ERASE_LINE so that the tested strings may have 2 of them
35+
[TEXT, blue(TEXT), reset(TEXT), ERASE_LINE, ERASE_LINE, CURSOR_UP],
36+
# :func:`strip_escape_sequences` strips ANSI codes known by Sphinx
37+
[TEXT, TEXT, TEXT, '', '', CURSOR_UP],
38+
),
39+
],
40+
ids=[strip_colors.__name__, strip_escape_sequences.__name__],
41+
)
42+
def test_strip_ansi(
43+
strip_function: Callable[[str], str],
44+
ansi_base_blocks: Sequence[str],
45+
text_base_blocks: Sequence[str],
46+
) -> None:
47+
assert callable(strip_function)
48+
assert len(text_base_blocks) == len(ansi_base_blocks)
49+
N = len(ansi_base_blocks)
50+
51+
def next_ansi_blocks(choices: Sequence[str], n: int) -> Sequence[str]:
52+
# Get a list of *n* words from a cyclic sequence of *choices*.
53+
#
54+
# For instance ``next_ansi_blocks(['a', 'b'], 3) == ['a', 'b', 'a']``.
55+
stream = itertools.cycle(choices)
56+
return list(map(operator.itemgetter(0), zip(stream, range(n))))
57+
58+
# generate all permutations of length N
59+
for sigma in itertools.permutations(range(N), N):
60+
# apply the permutation on the blocks with ANSI codes
61+
ansi_blocks = list(map(ansi_base_blocks.__getitem__, sigma))
62+
# apply the permutation on the blocks with stripped codes
63+
text_blocks = list(map(text_base_blocks.__getitem__, sigma))
64+
65+
for glue, n in itertools.product(['.', '\n', '\r\n'], range(4 * N)):
66+
ansi_strings = next_ansi_blocks(ansi_blocks, n)
67+
text_strings = next_ansi_blocks(text_blocks, n)
68+
assert len(ansi_strings) == len(text_strings) == n
69+
70+
ansi_string = glue.join(ansi_strings)
71+
text_string = glue.join(text_strings)
72+
assert strip_function(ansi_string) == text_string
73+
74+
75+
def test_strip_ansi_short_forms():
76+
# In Sphinx, we always "normalize" the color codes so that they
77+
# match "\x1b\[(\d\d;){0,2}(\d\d)m" but it might happen that
78+
# some messages use '\x1b[0m' instead of ``reset(s)``, so we
79+
# test whether this alternative form is supported or not.
80+
81+
for strip_function in [strip_colors, strip_escape_sequences]:
82+
# \x1b[m and \x1b[0m are equivalent to \x1b[00m
83+
assert strip_function('\x1b[m') == ''
84+
assert strip_function('\x1b[0m') == ''
85+
86+
# \x1b[1m is equivalent to \x1b[01m
87+
assert strip_function('\x1b[1mbold\x1b[0m') == 'bold'
88+
89+
# \x1b[K is equivalent to \x1b[0K
90+
assert strip_escape_sequences('\x1b[K') == ''

0 commit comments

Comments
 (0)