Skip to content

Commit 07f84f1

Browse files
jlevyclaude
andcommitted
Consolidate delimiter constants into AtomicPattern dataclass
- Remove module-level delimiter constants (JINJA_TAG_OPEN, etc.) - Add required `open_delim`, `close_delim`, `open_re`, `close_re` fields to AtomicPattern - Update tag_handling.py to use pattern properties directly (e.g., SINGLE_JINJA_TAG.open_delim) - Remove unnecessary `or ""` guards since fields are now always strings Each pattern now contains all its information in one place, making the code cleaner and easier to maintain. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 48d8927 commit 07f84f1

File tree

2 files changed

+95
-83
lines changed

2 files changed

+95
-83
lines changed

src/flowmark/linewrapping/atomic_patterns.py

Lines changed: 63 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,45 +3,30 @@
33
44
Each AtomicPattern defines a regex for a specific type of construct (code span, link,
55
template tag, etc.) that should be kept together as a single token during line wrapping.
6-
7-
This module also defines the canonical delimiter constants for all supported tag formats.
86
"""
97

108
from __future__ import annotations
119

1210
import re
1311
from dataclasses import dataclass
1412

15-
# Delimiter constants for template tags and comments.
16-
# Raw delimiters
17-
JINJA_TAG_OPEN = "{%"
18-
JINJA_TAG_CLOSE = "%}"
19-
JINJA_COMMENT_OPEN = "{#"
20-
JINJA_COMMENT_CLOSE = "#}"
21-
JINJA_VAR_OPEN = "{{"
22-
JINJA_VAR_CLOSE = "}}"
23-
HTML_COMMENT_OPEN = "<!--"
24-
HTML_COMMENT_CLOSE = "-->"
25-
26-
# Regex-escaped delimiters
27-
JINJA_TAG_OPEN_RE = r"\{%"
28-
JINJA_TAG_CLOSE_RE = r"%\}"
29-
JINJA_COMMENT_OPEN_RE = r"\{#"
30-
JINJA_COMMENT_CLOSE_RE = r"#\}"
31-
JINJA_VAR_OPEN_RE = r"\{\{"
32-
JINJA_VAR_CLOSE_RE = r"\}\}"
33-
HTML_COMMENT_OPEN_RE = r"<!--"
34-
HTML_COMMENT_CLOSE_RE = r"-->"
35-
3613

3714
@dataclass(frozen=True)
3815
class AtomicPattern:
3916
"""
4017
Defines a regex pattern for an atomic construct that should not be broken.
18+
19+
For delimiter-based patterns (tags, comments), `open_delim`/`close_delim` store
20+
the raw delimiters and `open_re`/`close_re` store regex-escaped versions.
21+
For non-delimiter patterns, these are empty strings.
4122
"""
4223

4324
name: str
4425
pattern: str
26+
open_delim: str
27+
close_delim: str
28+
open_re: str
29+
close_re: str
4530

4631

4732
def _make_paired_pattern(open_re: str, close_re: str, middle_char: str) -> str:
@@ -58,60 +43,91 @@ def _make_paired_pattern(open_re: str, close_re: str, middle_char: str) -> str:
5843
)
5944

6045

61-
def _make_single_tag_pattern(open_re: str, close_re: str) -> str:
62-
"""Generate a single tag pattern: opening...closing."""
63-
return rf"{open_re}.*?{close_re}"
64-
65-
6646
# Inline code spans with backticks (handles multi-backtick like ``code``)
6747
INLINE_CODE_SPAN = AtomicPattern(
6848
name="inline_code_span",
6949
pattern=r"(`+)(?:(?!\1).)+\1",
50+
open_delim="",
51+
close_delim="",
52+
open_re="",
53+
close_re="",
7054
)
7155

7256
# Markdown links: [text](url) or [text][ref] or [text]
7357
MARKDOWN_LINK = AtomicPattern(
7458
name="markdown_link",
7559
pattern=r"\[[^\]]*\](?:\([^)]*\)|\[[^\]]*\])?",
60+
open_delim="",
61+
close_delim="",
62+
open_re="",
63+
close_re="",
7664
)
7765

7866
# Jinja/Markdoc template tags: {% tag %}, {% /tag %}
7967
SINGLE_JINJA_TAG = AtomicPattern(
8068
name="single_jinja_tag",
81-
pattern=_make_single_tag_pattern(JINJA_TAG_OPEN_RE, JINJA_TAG_CLOSE_RE),
69+
pattern=r"\{%.*?%\}",
70+
open_delim="{%",
71+
close_delim="%}",
72+
open_re=r"\{%",
73+
close_re=r"%\}",
8274
)
8375

8476
PAIRED_JINJA_TAG = AtomicPattern(
8577
name="paired_jinja_tag",
86-
pattern=_make_paired_pattern(JINJA_TAG_OPEN_RE, JINJA_TAG_CLOSE_RE, "%"),
78+
pattern=_make_paired_pattern(r"\{%", r"%\}", "%"),
79+
open_delim="{%",
80+
close_delim="%}",
81+
open_re=r"\{%",
82+
close_re=r"%\}",
8783
)
8884

8985
# Jinja comments: {# comment #}
9086
SINGLE_JINJA_COMMENT = AtomicPattern(
9187
name="single_jinja_comment",
92-
pattern=_make_single_tag_pattern(JINJA_COMMENT_OPEN_RE, JINJA_COMMENT_CLOSE_RE),
88+
pattern=r"\{#.*?#\}",
89+
open_delim="{#",
90+
close_delim="#}",
91+
open_re=r"\{#",
92+
close_re=r"#\}",
9393
)
9494

9595
PAIRED_JINJA_COMMENT = AtomicPattern(
9696
name="paired_jinja_comment",
97-
pattern=_make_paired_pattern(JINJA_COMMENT_OPEN_RE, JINJA_COMMENT_CLOSE_RE, "#"),
97+
pattern=_make_paired_pattern(r"\{#", r"#\}", "#"),
98+
open_delim="{#",
99+
close_delim="#}",
100+
open_re=r"\{#",
101+
close_re=r"#\}",
98102
)
99103

100104
# Jinja variables: {{ variable }}
101105
SINGLE_JINJA_VAR = AtomicPattern(
102106
name="single_jinja_var",
103-
pattern=_make_single_tag_pattern(JINJA_VAR_OPEN_RE, JINJA_VAR_CLOSE_RE),
107+
pattern=r"\{\{.*?\}\}",
108+
open_delim="{{",
109+
close_delim="}}",
110+
open_re=r"\{\{",
111+
close_re=r"\}\}",
104112
)
105113

106114
PAIRED_JINJA_VAR = AtomicPattern(
107115
name="paired_jinja_var",
108-
pattern=_make_paired_pattern(JINJA_VAR_OPEN_RE, JINJA_VAR_CLOSE_RE, "}"),
116+
pattern=_make_paired_pattern(r"\{\{", r"\}\}", "}"),
117+
open_delim="{{",
118+
close_delim="}}",
119+
open_re=r"\{\{",
120+
close_re=r"\}\}",
109121
)
110122

111123
# HTML comments: <!-- comment -->
112124
SINGLE_HTML_COMMENT = AtomicPattern(
113125
name="single_html_comment",
114-
pattern=_make_single_tag_pattern(HTML_COMMENT_OPEN_RE, HTML_COMMENT_CLOSE_RE),
126+
pattern=r"<!--.*?-->",
127+
open_delim="<!--",
128+
close_delim="-->",
129+
open_re=r"<!--",
130+
close_re=r"-->",
115131
)
116132

117133
PAIRED_HTML_COMMENT = AtomicPattern(
@@ -121,17 +137,29 @@ def _make_single_tag_pattern(open_re: str, close_re: str) -> str:
121137
r"\s*"
122138
r"<!--\s*/[^-]*(?:-[^-]+)*-->"
123139
),
140+
open_delim="<!--",
141+
close_delim="-->",
142+
open_re=r"<!--",
143+
close_re=r"-->",
124144
)
125145

126146
# HTML/XML tags: <tag>, </tag>
127147
HTML_OPEN_TAG = AtomicPattern(
128148
name="html_open_tag",
129149
pattern=r"<[a-zA-Z][^>]*>",
150+
open_delim="",
151+
close_delim="",
152+
open_re="",
153+
close_re="",
130154
)
131155

132156
HTML_CLOSE_TAG = AtomicPattern(
133157
name="html_close_tag",
134158
pattern=r"</[a-zA-Z][^>]*>",
159+
open_delim="",
160+
close_delim="",
161+
open_re="",
162+
close_re="",
135163
)
136164

137165
# All patterns in priority order (more specific patterns first).

src/flowmark/linewrapping/tag_handling.py

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,6 @@
1414
import re
1515

1616
from flowmark.linewrapping.atomic_patterns import (
17-
HTML_COMMENT_CLOSE,
18-
HTML_COMMENT_CLOSE_RE,
19-
HTML_COMMENT_OPEN,
20-
HTML_COMMENT_OPEN_RE,
21-
JINJA_COMMENT_CLOSE,
22-
JINJA_COMMENT_CLOSE_RE,
23-
JINJA_COMMENT_OPEN,
24-
JINJA_COMMENT_OPEN_RE,
25-
JINJA_TAG_CLOSE,
26-
JINJA_TAG_CLOSE_RE,
27-
JINJA_TAG_OPEN,
28-
JINJA_TAG_OPEN_RE,
29-
JINJA_VAR_CLOSE,
30-
JINJA_VAR_CLOSE_RE,
31-
JINJA_VAR_OPEN,
32-
JINJA_VAR_OPEN_RE,
3317
PAIRED_HTML_COMMENT,
3418
PAIRED_JINJA_COMMENT,
3519
PAIRED_JINJA_TAG,
@@ -74,18 +58,18 @@
7458
# Pattern to detect adjacent tags (closing tag immediately followed by opening tag)
7559
# This handles cases like %}{% or --><!-- where there's no space between
7660
_adjacent_tags_re: re.Pattern[str] = re.compile(
77-
rf"({JINJA_TAG_CLOSE_RE})({JINJA_TAG_OPEN_RE})|"
78-
rf"({JINJA_COMMENT_CLOSE_RE})({JINJA_COMMENT_OPEN_RE})|"
79-
rf"({JINJA_VAR_CLOSE_RE})({JINJA_VAR_OPEN_RE})|"
80-
rf"({HTML_COMMENT_CLOSE_RE})({HTML_COMMENT_OPEN_RE})"
61+
rf"({SINGLE_JINJA_TAG.close_re})({SINGLE_JINJA_TAG.open_re})|"
62+
rf"({SINGLE_JINJA_COMMENT.close_re})({SINGLE_JINJA_COMMENT.open_re})|"
63+
rf"({SINGLE_JINJA_VAR.close_re})({SINGLE_JINJA_VAR.open_re})|"
64+
rf"({SINGLE_HTML_COMMENT.close_re})({SINGLE_HTML_COMMENT.open_re})"
8165
)
8266

8367
# Pattern to remove spaces between adjacent tags that were added during word splitting
8468
_denormalize_tags_re: re.Pattern[str] = re.compile(
85-
rf"({JINJA_TAG_CLOSE_RE}) ({JINJA_TAG_OPEN_RE})|"
86-
rf"({JINJA_COMMENT_CLOSE_RE}) ({JINJA_COMMENT_OPEN_RE})|"
87-
rf"({JINJA_VAR_CLOSE_RE}) ({JINJA_VAR_OPEN_RE})|"
88-
rf"({HTML_COMMENT_CLOSE_RE}) ({HTML_COMMENT_OPEN_RE})"
69+
rf"({SINGLE_JINJA_TAG.close_re}) ({SINGLE_JINJA_TAG.open_re})|"
70+
rf"({SINGLE_JINJA_COMMENT.close_re}) ({SINGLE_JINJA_COMMENT.open_re})|"
71+
rf"({SINGLE_JINJA_VAR.close_re}) ({SINGLE_JINJA_VAR.open_re})|"
72+
rf"({SINGLE_HTML_COMMENT.close_re}) ({SINGLE_HTML_COMMENT.open_re})"
8973
)
9074

9175

@@ -136,18 +120,18 @@ def _is_tag_only_line(line: str) -> bool:
136120

137121
# Check if it starts with a tag
138122
starts_tag = (
139-
stripped.startswith(JINJA_TAG_OPEN)
140-
or stripped.startswith(JINJA_COMMENT_OPEN)
141-
or stripped.startswith(JINJA_VAR_OPEN)
142-
or stripped.startswith(HTML_COMMENT_OPEN)
123+
stripped.startswith(SINGLE_JINJA_TAG.open_delim)
124+
or stripped.startswith(SINGLE_JINJA_COMMENT.open_delim)
125+
or stripped.startswith(SINGLE_JINJA_VAR.open_delim)
126+
or stripped.startswith(SINGLE_HTML_COMMENT.open_delim)
143127
)
144128

145129
# Check if it ends with a tag
146130
ends_tag = (
147-
stripped.endswith(JINJA_TAG_CLOSE)
148-
or stripped.endswith(JINJA_COMMENT_CLOSE)
149-
or stripped.endswith(JINJA_VAR_CLOSE)
150-
or stripped.endswith(HTML_COMMENT_CLOSE)
131+
stripped.endswith(SINGLE_JINJA_TAG.close_delim)
132+
or stripped.endswith(SINGLE_JINJA_COMMENT.close_delim)
133+
or stripped.endswith(SINGLE_JINJA_VAR.close_delim)
134+
or stripped.endswith(SINGLE_HTML_COMMENT.close_delim)
151135
)
152136

153137
return starts_tag and ends_tag
@@ -214,13 +198,13 @@ def line_ends_with_tag(line: str) -> bool:
214198
return False
215199
# Check for Jinja-style tags
216200
if (
217-
stripped.endswith(JINJA_TAG_CLOSE)
218-
or stripped.endswith(JINJA_COMMENT_CLOSE)
219-
or stripped.endswith(JINJA_VAR_CLOSE)
201+
stripped.endswith(SINGLE_JINJA_TAG.close_delim)
202+
or stripped.endswith(SINGLE_JINJA_COMMENT.close_delim)
203+
or stripped.endswith(SINGLE_JINJA_VAR.close_delim)
220204
):
221205
return True
222206
# Check for HTML comments
223-
if stripped.endswith(HTML_COMMENT_CLOSE):
207+
if stripped.endswith(SINGLE_HTML_COMMENT.close_delim):
224208
return True
225209
return False
226210

@@ -232,13 +216,13 @@ def line_starts_with_tag(line: str) -> bool:
232216
return False
233217
# Check for Jinja-style tags
234218
if (
235-
stripped.startswith(JINJA_TAG_OPEN)
236-
or stripped.startswith(JINJA_COMMENT_OPEN)
237-
or stripped.startswith(JINJA_VAR_OPEN)
219+
stripped.startswith(SINGLE_JINJA_TAG.open_delim)
220+
or stripped.startswith(SINGLE_JINJA_COMMENT.open_delim)
221+
or stripped.startswith(SINGLE_JINJA_VAR.open_delim)
238222
):
239223
return True
240224
# Check for HTML comments
241-
if stripped.startswith(HTML_COMMENT_OPEN):
225+
if stripped.startswith(SINGLE_HTML_COMMENT.open_delim):
242226
return True
243227
return False
244228

@@ -444,10 +428,10 @@ def _fix_closing_tag_spacing(text: str) -> str:
444428
# where a multi-line opening tag ends and a closing tag follows on the same line.
445429
# Uses named group "closing_tag" to capture the start of the closing tag.
446430
_multiline_closing_pattern: re.Pattern[str] = re.compile(
447-
rf"{JINJA_TAG_CLOSE_RE}\s*(?P<closing_tag>{JINJA_TAG_OPEN_RE}\s*/)|" # %}{% /
448-
rf"{JINJA_COMMENT_CLOSE_RE}\s*(?P<closing_comment>{JINJA_COMMENT_OPEN_RE}\s*/)|" # #}{# /
449-
rf"{JINJA_VAR_CLOSE_RE}\s*(?P<closing_var>{JINJA_VAR_OPEN_RE}\s*/)|" # }}{{ /
450-
rf"{HTML_COMMENT_CLOSE_RE}\s*(?P<closing_html>{HTML_COMMENT_OPEN_RE}\s*/)" # --><!-- /
431+
rf"{SINGLE_JINJA_TAG.close_re}\s*(?P<closing_tag>{SINGLE_JINJA_TAG.open_re}\s*/)|"
432+
rf"{SINGLE_JINJA_COMMENT.close_re}\s*(?P<closing_comment>{SINGLE_JINJA_COMMENT.open_re}\s*/)|"
433+
rf"{SINGLE_JINJA_VAR.close_re}\s*(?P<closing_var>{SINGLE_JINJA_VAR.open_re}\s*/)|"
434+
rf"{SINGLE_HTML_COMMENT.close_re}\s*(?P<closing_html>{SINGLE_HTML_COMMENT.open_re}\s*/)"
451435
)
452436

453437

@@ -490,10 +474,10 @@ def _fix_multiline_opening_tag_with_closing(text: str) -> str:
490474
# Only process lines that are continuations (don't start with a tag opener).
491475
# If a line starts with a tag opener, the tag began on that line, not a continuation.
492476
is_tag_start = (
493-
stripped.startswith(JINJA_TAG_OPEN)
494-
or stripped.startswith(JINJA_COMMENT_OPEN)
495-
or stripped.startswith(JINJA_VAR_OPEN)
496-
or stripped.startswith(HTML_COMMENT_OPEN)
477+
stripped.startswith(SINGLE_JINJA_TAG.open_delim)
478+
or stripped.startswith(SINGLE_JINJA_COMMENT.open_delim)
479+
or stripped.startswith(SINGLE_JINJA_VAR.open_delim)
480+
or stripped.startswith(SINGLE_HTML_COMMENT.open_delim)
497481
)
498482

499483
if not is_tag_start:

0 commit comments

Comments
 (0)