Skip to content

Commit f78fa19

Browse files
committed
Added regular expressions to detect ANSI escape and style sequences.
1 parent 378e825 commit f78fa19

File tree

3 files changed

+30
-19
lines changed

3 files changed

+30
-19
lines changed

cmd2/argparse_custom.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def __init__(self, value: object, descriptive_data: Sequence[Any], *args: Any) -
398398
# Make sure all objects are renderable by a Rich table.
399399
renderable_data = [obj if is_renderable(obj) else str(obj) for obj in descriptive_data]
400400

401-
# Convert objects with ANSI styles to Rich Text for correct display width.
401+
# Convert objects with ANSI escape sequences to Rich Text for correct display width.
402402
self.descriptive_data = ru.prepare_objects_for_rendering(*renderable_data)
403403

404404
# Save the original value to support CompletionItems as argparse choices.

cmd2/rich_utils.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Provides common utilities to support Rich in cmd2-based applications."""
22

3+
import re
34
from collections.abc import Mapping
45
from enum import Enum
56
from typing import (
@@ -28,13 +29,18 @@
2829

2930
from .styles import DEFAULT_CMD2_STYLES
3031

32+
# A compiled regular expression to detect ANSI escape sequences.
33+
# The `[a-zA-Z]` at the end of the regex allows it to match all types of
34+
# escape sequences, including those for styling, cursor movement, etc.
35+
_ANSI_ESCAPE_SEQUENCE_RE = re.compile(r"\x1b\[[0-9;?]*[a-zA-Z]")
36+
3137

3238
class AllowStyle(Enum):
3339
"""Values for ``cmd2.rich_utils.ALLOW_STYLE``."""
3440

35-
ALWAYS = 'Always' # Always output ANSI style sequences
36-
NEVER = 'Never' # Remove ANSI style sequences from all output
37-
TERMINAL = 'Terminal' # Remove ANSI style sequences if the output is not going to the terminal
41+
ALWAYS = "Always" # Always output ANSI style sequences
42+
NEVER = "Never" # Remove ANSI style sequences from all output
43+
TERMINAL = "Terminal" # Remove ANSI style sequences if the output is not going to the terminal
3844

3945
def __str__(self) -> str:
4046
"""Return value instead of enum name for printing in cmd2's set command."""
@@ -234,7 +240,7 @@ def rich_text_to_string(text: Text) -> str:
234240
"""Convert a Rich Text object to a string.
235241
236242
This function's purpose is to render a Rich Text object, including any styles (e.g., color, bold),
237-
to a plain Python string with ANSI escape codes. It differs from `text.plain`, which strips
243+
to a plain Python string with ANSI style sequences. It differs from `text.plain`, which strips
238244
all formatting.
239245
240246
:param text: the text object to convert
@@ -259,7 +265,7 @@ def rich_text_to_string(text: Text) -> str:
259265

260266

261267
def string_to_rich_text(text: str) -> Text:
262-
r"""Create a Text object from a string which can contain ANSI escape codes.
268+
r"""Create a Text object from a string which can contain ANSI style sequences.
263269
264270
This wraps rich.Text.from_ansi() to handle an issue where it removes the
265271
trailing line break from a string (e.g. "Hello\n" becomes "Hello").
@@ -323,9 +329,9 @@ def prepare_objects_for_rendering(*objects: Any) -> tuple[Any, ...]:
323329
"""Prepare a tuple of objects for printing by Rich's Console.print().
324330
325331
This function converts any non-Rich object whose string representation contains
326-
ANSI style codes into a rich.Text object. This ensures correct display width
327-
calculation, as Rich can then properly parse and account for the non-printing
328-
ANSI codes. All other objects are left untouched, allowing Rich's native
332+
ANSI escape sequences into a rich.Text object. This ensures correct display width
333+
calculation, as Rich can then properly parse and account for these non-printing
334+
codes. All other objects are left untouched, allowing Rich's native
329335
renderers to handle them.
330336
331337
:param objects: objects to prepare
@@ -342,12 +348,10 @@ def prepare_objects_for_rendering(*objects: Any) -> tuple[Any, ...]:
342348
if isinstance(renderable, ConsoleRenderable):
343349
continue
344350

345-
# Check if the object's string representation contains ANSI styles, and if so,
346-
# replace it with a Rich Text object for correct width calculation.
347351
renderable_as_str = str(renderable)
348-
renderable_as_text = string_to_rich_text(renderable_as_str)
349352

350-
if renderable_as_text.plain != renderable_as_str:
351-
object_list[i] = renderable_as_text
353+
# Check for any ANSI escape sequences in the string.
354+
if _ANSI_ESCAPE_SEQUENCE_RE.search(renderable_as_str):
355+
object_list[i] = string_to_rich_text(renderable_as_str)
352356

353357
return tuple(object_list)

cmd2/string_utils.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
"""Provides string utility functions.
22
33
This module offers a collection of string utility functions built on the Rich library.
4-
These utilities are designed to correctly handle strings with ANSI escape codes and
4+
These utilities are designed to correctly handle strings with ANSI style sequences and
55
full-width characters (like those used in CJK languages).
66
"""
77

8+
import re
9+
810
from rich.align import AlignMethod
911
from rich.style import StyleType
1012

1113
from . import rich_utils as ru
1214

15+
# A compiled regular expression to detect ANSI style sequences.
16+
_ANSI_STYLE_SEQUENCE_RE = re.compile(r"\x1b\[[0-9;?]*m")
17+
1318

1419
def align(
1520
val: str,
@@ -94,13 +99,15 @@ def stylize(val: str, style: StyleType) -> str:
9499

95100

96101
def strip_style(val: str) -> str:
97-
"""Strip all ANSI styles from a string.
102+
"""Strip all ANSI style sequences from a string.
103+
104+
This function uses a regular expression to efficiently remove ANSI style
105+
sequences, which are a subset of ANSI escape sequences used for text formatting.
98106
99107
:param val: string to be stripped
100108
:return: the stripped string
101109
"""
102-
text = ru.string_to_rich_text(val)
103-
return text.plain
110+
return _ANSI_STYLE_SEQUENCE_RE.sub("", val)
104111

105112

106113
def str_width(val: str) -> int:
@@ -163,4 +170,4 @@ def norm_fold(val: str) -> str:
163170
"""
164171
import unicodedata
165172

166-
return unicodedata.normalize('NFC', val).casefold()
173+
return unicodedata.normalize("NFC", val).casefold()

0 commit comments

Comments
 (0)