Skip to content

Commit 2557f43

Browse files
authored
Merge pull request #23 from jlevy/feature/benchmark-tooling
Fix performance regression and simplify tag handling
2 parents 704b73a + 07f84f1 commit 2557f43

16 files changed

+483
-682
lines changed

docs/project/specs/active/plan-2026-01-14-atomic-tag-wrapping.md renamed to docs/project/specs/done/plan-2026-01-14-atomic-tag-wrapping.md

File renamed without changes.

docs/project/specs/active/valid-2026-01-14-atomic-tag-wrapping.md renamed to docs/project/specs/done/valid-2026-01-14-atomic-tag-wrapping.md

File renamed without changes.

src/flowmark/cli.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
from dataclasses import dataclass
5252

5353
from flowmark.formats.flowmark_markdown import ListSpacing
54-
from flowmark.linewrapping.tag_handling import TagWrapping
5554
from flowmark.reformat_api import reformat_files
5655

5756

@@ -62,7 +61,6 @@ class Options:
6261
files: list[str]
6362
output: str
6463
width: int
65-
tags: TagWrapping
6664
plaintext: bool
6765
semantic: bool
6866
cleanups: bool
@@ -111,14 +109,6 @@ def _parse_args(args: list[str] | None = None) -> Options:
111109
parser.add_argument(
112110
"-p", "--plaintext", action="store_true", help="Process as plaintext (no Markdown parsing)"
113111
)
114-
parser.add_argument(
115-
"--tags",
116-
type=str,
117-
choices=["atomic", "wrap"],
118-
default="atomic",
119-
help="How to handle template tags during line wrapping: 'atomic' (default) never breaks "
120-
"tags across lines, 'wrap' allows tags to wrap like normal text",
121-
)
122112
parser.add_argument(
123113
"-s",
124114
"--semantic",
@@ -190,7 +180,6 @@ def _parse_args(args: list[str] | None = None) -> Options:
190180
files=opts.files,
191181
output=opts.output,
192182
width=opts.width,
193-
tags=TagWrapping(opts.tags),
194183
plaintext=opts.plaintext,
195184
semantic=opts.semantic,
196185
cleanups=opts.cleanups,
@@ -229,7 +218,6 @@ def main(args: list[str] | None = None) -> int:
229218
files=options.files,
230219
output=options.output,
231220
width=options.width,
232-
tags=options.tags,
233221
inplace=options.inplace,
234222
nobackup=options.nobackup,
235223
plaintext=options.plaintext,
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
"""
2+
Atomic pattern definitions for constructs that should not be broken during wrapping.
3+
4+
Each AtomicPattern defines a regex for a specific type of construct (code span, link,
5+
template tag, etc.) that should be kept together as a single token during line wrapping.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import re
11+
from dataclasses import dataclass
12+
13+
14+
@dataclass(frozen=True)
15+
class AtomicPattern:
16+
"""
17+
Defines a regex pattern for an atomic construct that should not be broken.
18+
19+
For delimiter-based patterns (tags, comments), `open_delim`/`close_delim` store
20+
the raw delimiters and `open_re`/`close_re` store regex-escaped versions.
21+
For non-delimiter patterns, these are empty strings.
22+
"""
23+
24+
name: str
25+
pattern: str
26+
open_delim: str
27+
close_delim: str
28+
open_re: str
29+
close_re: str
30+
31+
32+
def _make_paired_pattern(open_re: str, close_re: str, middle_char: str) -> str:
33+
"""
34+
Generate a paired tag pattern: opening + closing kept together.
35+
36+
Uses `(?!\\s*/)` lookahead to ensure first tag is opening (not closing).
37+
The middle_char is the character to exclude from middle content.
38+
"""
39+
return (
40+
rf"{open_re}(?!\s*/)[^{middle_char}]*{close_re}"
41+
rf"\s*"
42+
rf"{open_re}\s*/[^{middle_char}]*{close_re}"
43+
)
44+
45+
46+
# Inline code spans with backticks (handles multi-backtick like ``code``)
47+
INLINE_CODE_SPAN = AtomicPattern(
48+
name="inline_code_span",
49+
pattern=r"(`+)(?:(?!\1).)+\1",
50+
open_delim="",
51+
close_delim="",
52+
open_re="",
53+
close_re="",
54+
)
55+
56+
# Markdown links: [text](url) or [text][ref] or [text]
57+
MARKDOWN_LINK = AtomicPattern(
58+
name="markdown_link",
59+
pattern=r"\[[^\]]*\](?:\([^)]*\)|\[[^\]]*\])?",
60+
open_delim="",
61+
close_delim="",
62+
open_re="",
63+
close_re="",
64+
)
65+
66+
# Jinja/Markdoc template tags: {% tag %}, {% /tag %}
67+
SINGLE_JINJA_TAG = AtomicPattern(
68+
name="single_jinja_tag",
69+
pattern=r"\{%.*?%\}",
70+
open_delim="{%",
71+
close_delim="%}",
72+
open_re=r"\{%",
73+
close_re=r"%\}",
74+
)
75+
76+
PAIRED_JINJA_TAG = AtomicPattern(
77+
name="paired_jinja_tag",
78+
pattern=_make_paired_pattern(r"\{%", r"%\}", "%"),
79+
open_delim="{%",
80+
close_delim="%}",
81+
open_re=r"\{%",
82+
close_re=r"%\}",
83+
)
84+
85+
# Jinja comments: {# comment #}
86+
SINGLE_JINJA_COMMENT = AtomicPattern(
87+
name="single_jinja_comment",
88+
pattern=r"\{#.*?#\}",
89+
open_delim="{#",
90+
close_delim="#}",
91+
open_re=r"\{#",
92+
close_re=r"#\}",
93+
)
94+
95+
PAIRED_JINJA_COMMENT = AtomicPattern(
96+
name="paired_jinja_comment",
97+
pattern=_make_paired_pattern(r"\{#", r"#\}", "#"),
98+
open_delim="{#",
99+
close_delim="#}",
100+
open_re=r"\{#",
101+
close_re=r"#\}",
102+
)
103+
104+
# Jinja variables: {{ variable }}
105+
SINGLE_JINJA_VAR = AtomicPattern(
106+
name="single_jinja_var",
107+
pattern=r"\{\{.*?\}\}",
108+
open_delim="{{",
109+
close_delim="}}",
110+
open_re=r"\{\{",
111+
close_re=r"\}\}",
112+
)
113+
114+
PAIRED_JINJA_VAR = AtomicPattern(
115+
name="paired_jinja_var",
116+
pattern=_make_paired_pattern(r"\{\{", r"\}\}", "}"),
117+
open_delim="{{",
118+
close_delim="}}",
119+
open_re=r"\{\{",
120+
close_re=r"\}\}",
121+
)
122+
123+
# HTML comments: <!-- comment -->
124+
SINGLE_HTML_COMMENT = AtomicPattern(
125+
name="single_html_comment",
126+
pattern=r"<!--.*?-->",
127+
open_delim="<!--",
128+
close_delim="-->",
129+
open_re=r"<!--",
130+
close_re=r"-->",
131+
)
132+
133+
PAIRED_HTML_COMMENT = AtomicPattern(
134+
name="paired_html_comment",
135+
pattern=(
136+
r"<!--(?!\s*/)[^-]*(?:-[^-]+)*-->"
137+
r"\s*"
138+
r"<!--\s*/[^-]*(?:-[^-]+)*-->"
139+
),
140+
open_delim="<!--",
141+
close_delim="-->",
142+
open_re=r"<!--",
143+
close_re=r"-->",
144+
)
145+
146+
# HTML/XML tags: <tag>, </tag>
147+
HTML_OPEN_TAG = AtomicPattern(
148+
name="html_open_tag",
149+
pattern=r"<[a-zA-Z][^>]*>",
150+
open_delim="",
151+
close_delim="",
152+
open_re="",
153+
close_re="",
154+
)
155+
156+
HTML_CLOSE_TAG = AtomicPattern(
157+
name="html_close_tag",
158+
pattern=r"</[a-zA-Z][^>]*>",
159+
open_delim="",
160+
close_delim="",
161+
open_re="",
162+
close_re="",
163+
)
164+
165+
# All patterns in priority order (more specific patterns first).
166+
# Paired tag patterns must come before single tag patterns to match correctly.
167+
ATOMIC_PATTERNS: tuple[AtomicPattern, ...] = (
168+
INLINE_CODE_SPAN,
169+
MARKDOWN_LINK,
170+
# Paired tags must come before single tags
171+
PAIRED_JINJA_TAG,
172+
PAIRED_JINJA_COMMENT,
173+
PAIRED_JINJA_VAR,
174+
PAIRED_HTML_COMMENT,
175+
# Single tags
176+
SINGLE_JINJA_TAG,
177+
SINGLE_JINJA_COMMENT,
178+
SINGLE_JINJA_VAR,
179+
SINGLE_HTML_COMMENT,
180+
# HTML tags
181+
HTML_OPEN_TAG,
182+
HTML_CLOSE_TAG,
183+
)
184+
185+
# Compiled regex combining all patterns with alternation
186+
ATOMIC_CONSTRUCT_PATTERN: re.Pattern[str] = re.compile(
187+
"|".join(p.pattern for p in ATOMIC_PATTERNS),
188+
re.DOTALL,
189+
)

src/flowmark/linewrapping/line_wrappers.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from flowmark.linewrapping.protocols import LineWrapper
88
from flowmark.linewrapping.sentence_split_regex import split_sentences_regex
99
from flowmark.linewrapping.tag_handling import (
10-
TagWrapping,
1110
add_tag_newline_handling,
1211
denormalize_adjacent_tags,
1312
)
@@ -79,7 +78,6 @@ def enhanced_wrapper(text: str, initial_indent: str, subsequent_indent: str) ->
7978

8079
def line_wrap_to_width(
8180
width: int = DEFAULT_WRAP_WIDTH,
82-
tags: TagWrapping = TagWrapping.atomic,
8381
len_fn: Callable[[str], int] = DEFAULT_LEN_FUNCTION,
8482
is_markdown: bool = False,
8583
) -> LineWrapper:
@@ -95,14 +93,13 @@ def line_wrapper(text: str, initial_indent: str, subsequent_indent: str) -> str:
9593
subsequent_indent=subsequent_indent,
9694
len_fn=len_fn,
9795
is_markdown=is_markdown,
98-
tags=tags,
9996
)
10097

10198
if is_markdown:
10299
# Apply tag newline handling first, then hard break handling
103100
# Order matters: tag handling should operate on original newlines
104101
# before hard break handling normalizes explicit breaks
105-
enhanced = add_tag_newline_handling(line_wrapper, tags=tags)
102+
enhanced = add_tag_newline_handling(line_wrapper)
106103
return _add_markdown_hard_break_handling(enhanced)
107104
else:
108105
return line_wrapper
@@ -111,7 +108,6 @@ def line_wrapper(text: str, initial_indent: str, subsequent_indent: str) -> str:
111108
def line_wrap_by_sentence(
112109
split_sentences: SentenceSplitter = split_sentences_no_min_length,
113110
width: int = DEFAULT_WRAP_WIDTH,
114-
tags: TagWrapping = TagWrapping.atomic,
115111
min_line_len: int = DEFAULT_MIN_LINE_LEN,
116112
len_fn: Callable[[str], int] = DEFAULT_LEN_FUNCTION,
117113
is_markdown: bool = False,
@@ -120,10 +116,6 @@ def line_wrap_by_sentence(
120116
Wrap lines of text to a given width but also keep sentences on their own lines.
121117
If the last line ends up shorter than `min_line_len`, it's combined with the
122118
next sentence.
123-
124-
The `tags` parameter controls template tag handling:
125-
- `atomic`: Tags are treated as indivisible tokens (never broken across lines)
126-
- `wrap`: Tags can wrap like normal text (legacy behavior with coalescing limits)
127119
"""
128120

129121
def line_wrapper(text: str, initial_indent: str, subsequent_indent: str) -> str:
@@ -152,7 +144,6 @@ def line_wrapper(text: str, initial_indent: str, subsequent_indent: str) -> str:
152144
initial_column=current_column,
153145
subsequent_offset=subsequent_indent_len,
154146
is_markdown=is_markdown,
155-
tags=tags,
156147
)
157148
# If last line is shorter than min_line_len, combine with next line.
158149
# Also handles if the first word doesn't fit.
@@ -182,7 +173,7 @@ def line_wrapper(text: str, initial_indent: str, subsequent_indent: str) -> str:
182173

183174
if is_markdown:
184175
# Apply tag newline handling first, then hard break handling
185-
enhanced = add_tag_newline_handling(line_wrapper, tags=tags)
176+
enhanced = add_tag_newline_handling(line_wrapper)
186177
return _add_markdown_hard_break_handling(enhanced)
187178
else:
188179
return line_wrapper

src/flowmark/linewrapping/markdown_filling.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
)
2222
from flowmark.linewrapping.protocols import LineWrapper
2323
from flowmark.linewrapping.sentence_split_regex import split_sentences_regex
24-
from flowmark.linewrapping.tag_handling import TagWrapping, preprocess_tag_block_spacing
24+
from flowmark.linewrapping.tag_handling import preprocess_tag_block_spacing
2525
from flowmark.linewrapping.text_filling import DEFAULT_WRAP_WIDTH
2626
from flowmark.transforms.doc_cleanups import doc_cleanups
2727
from flowmark.transforms.doc_transforms import rewrite_text_content
@@ -37,7 +37,6 @@ def fill_markdown(
3737
markdown_text: str,
3838
dedent_input: bool = True,
3939
width: int = DEFAULT_WRAP_WIDTH,
40-
tags: TagWrapping = TagWrapping.atomic,
4140
semantic: bool = False,
4241
cleanups: bool = False,
4342
smartquotes: bool = False,
@@ -61,19 +60,17 @@ def fill_markdown(
6160
With `semantic` enabled, the line breaks are wrapped approximately
6261
by sentence boundaries, to make diffs more readable.
6362
64-
The `tags` parameter controls how template tags (Markdoc, Jinja, HTML
65-
comments) are handled during wrapping:
66-
- `atomic` (default): Tags are never broken across lines
67-
- `wrap`: Tags can wrap like normal text (legacy behavior)
63+
Template tags (Markdoc, Jinja, HTML comments) are always treated atomically
64+
and never broken across lines.
6865
6966
Preserves YAML frontmatter (delimited by --- lines) if present at the
7067
beginning of the document.
7168
"""
7269
if line_wrapper is None:
7370
if semantic:
74-
line_wrapper = line_wrap_by_sentence(width=width, tags=tags, is_markdown=True)
71+
line_wrapper = line_wrap_by_sentence(width=width, is_markdown=True)
7572
else:
76-
line_wrapper = line_wrap_to_width(width=width, tags=tags, is_markdown=True)
73+
line_wrapper = line_wrap_to_width(width=width, is_markdown=True)
7774

7875
# Extract frontmatter before any processing
7976
frontmatter, content = split_frontmatter(markdown_text)

0 commit comments

Comments
 (0)