|
| 1 | +""" |
| 2 | +Atomic pattern definitions for constructs that should not be broken during wrapping. |
| 3 | +
|
| 4 | +Each AtomicPattern defines a regex for a specific type of construct (code span, link, |
| 5 | +template tag, etc.) that should be kept together as a single token during line wrapping. |
| 6 | +""" |
| 7 | + |
| 8 | +from __future__ import annotations |
| 9 | + |
| 10 | +import re |
| 11 | +from dataclasses import dataclass |
| 12 | + |
| 13 | + |
| 14 | +@dataclass(frozen=True) |
| 15 | +class AtomicPattern: |
| 16 | + """ |
| 17 | + Defines a regex pattern for an atomic construct that should not be broken. |
| 18 | +
|
| 19 | + For delimiter-based patterns (tags, comments), `open_delim`/`close_delim` store |
| 20 | + the raw delimiters and `open_re`/`close_re` store regex-escaped versions. |
| 21 | + For non-delimiter patterns, these are empty strings. |
| 22 | + """ |
| 23 | + |
| 24 | + name: str |
| 25 | + pattern: str |
| 26 | + open_delim: str |
| 27 | + close_delim: str |
| 28 | + open_re: str |
| 29 | + close_re: str |
| 30 | + |
| 31 | + |
| 32 | +def _make_paired_pattern(open_re: str, close_re: str, middle_char: str) -> str: |
| 33 | + """ |
| 34 | + Generate a paired tag pattern: opening + closing kept together. |
| 35 | +
|
| 36 | + Uses `(?!\\s*/)` lookahead to ensure first tag is opening (not closing). |
| 37 | + The middle_char is the character to exclude from middle content. |
| 38 | + """ |
| 39 | + return ( |
| 40 | + rf"{open_re}(?!\s*/)[^{middle_char}]*{close_re}" |
| 41 | + rf"\s*" |
| 42 | + rf"{open_re}\s*/[^{middle_char}]*{close_re}" |
| 43 | + ) |
| 44 | + |
| 45 | + |
| 46 | +# Inline code spans with backticks (handles multi-backtick like ``code``) |
| 47 | +INLINE_CODE_SPAN = AtomicPattern( |
| 48 | + name="inline_code_span", |
| 49 | + pattern=r"(`+)(?:(?!\1).)+\1", |
| 50 | + open_delim="", |
| 51 | + close_delim="", |
| 52 | + open_re="", |
| 53 | + close_re="", |
| 54 | +) |
| 55 | + |
| 56 | +# Markdown links: [text](url) or [text][ref] or [text] |
| 57 | +MARKDOWN_LINK = AtomicPattern( |
| 58 | + name="markdown_link", |
| 59 | + pattern=r"\[[^\]]*\](?:\([^)]*\)|\[[^\]]*\])?", |
| 60 | + open_delim="", |
| 61 | + close_delim="", |
| 62 | + open_re="", |
| 63 | + close_re="", |
| 64 | +) |
| 65 | + |
| 66 | +# Jinja/Markdoc template tags: {% tag %}, {% /tag %} |
| 67 | +SINGLE_JINJA_TAG = AtomicPattern( |
| 68 | + name="single_jinja_tag", |
| 69 | + pattern=r"\{%.*?%\}", |
| 70 | + open_delim="{%", |
| 71 | + close_delim="%}", |
| 72 | + open_re=r"\{%", |
| 73 | + close_re=r"%\}", |
| 74 | +) |
| 75 | + |
| 76 | +PAIRED_JINJA_TAG = AtomicPattern( |
| 77 | + name="paired_jinja_tag", |
| 78 | + pattern=_make_paired_pattern(r"\{%", r"%\}", "%"), |
| 79 | + open_delim="{%", |
| 80 | + close_delim="%}", |
| 81 | + open_re=r"\{%", |
| 82 | + close_re=r"%\}", |
| 83 | +) |
| 84 | + |
| 85 | +# Jinja comments: {# comment #} |
| 86 | +SINGLE_JINJA_COMMENT = AtomicPattern( |
| 87 | + name="single_jinja_comment", |
| 88 | + pattern=r"\{#.*?#\}", |
| 89 | + open_delim="{#", |
| 90 | + close_delim="#}", |
| 91 | + open_re=r"\{#", |
| 92 | + close_re=r"#\}", |
| 93 | +) |
| 94 | + |
| 95 | +PAIRED_JINJA_COMMENT = AtomicPattern( |
| 96 | + name="paired_jinja_comment", |
| 97 | + pattern=_make_paired_pattern(r"\{#", r"#\}", "#"), |
| 98 | + open_delim="{#", |
| 99 | + close_delim="#}", |
| 100 | + open_re=r"\{#", |
| 101 | + close_re=r"#\}", |
| 102 | +) |
| 103 | + |
| 104 | +# Jinja variables: {{ variable }} |
| 105 | +SINGLE_JINJA_VAR = AtomicPattern( |
| 106 | + name="single_jinja_var", |
| 107 | + pattern=r"\{\{.*?\}\}", |
| 108 | + open_delim="{{", |
| 109 | + close_delim="}}", |
| 110 | + open_re=r"\{\{", |
| 111 | + close_re=r"\}\}", |
| 112 | +) |
| 113 | + |
| 114 | +PAIRED_JINJA_VAR = AtomicPattern( |
| 115 | + name="paired_jinja_var", |
| 116 | + pattern=_make_paired_pattern(r"\{\{", r"\}\}", "}"), |
| 117 | + open_delim="{{", |
| 118 | + close_delim="}}", |
| 119 | + open_re=r"\{\{", |
| 120 | + close_re=r"\}\}", |
| 121 | +) |
| 122 | + |
| 123 | +# HTML comments: <!-- comment --> |
| 124 | +SINGLE_HTML_COMMENT = AtomicPattern( |
| 125 | + name="single_html_comment", |
| 126 | + pattern=r"<!--.*?-->", |
| 127 | + open_delim="<!--", |
| 128 | + close_delim="-->", |
| 129 | + open_re=r"<!--", |
| 130 | + close_re=r"-->", |
| 131 | +) |
| 132 | + |
| 133 | +PAIRED_HTML_COMMENT = AtomicPattern( |
| 134 | + name="paired_html_comment", |
| 135 | + pattern=( |
| 136 | + r"<!--(?!\s*/)[^-]*(?:-[^-]+)*-->" |
| 137 | + r"\s*" |
| 138 | + r"<!--\s*/[^-]*(?:-[^-]+)*-->" |
| 139 | + ), |
| 140 | + open_delim="<!--", |
| 141 | + close_delim="-->", |
| 142 | + open_re=r"<!--", |
| 143 | + close_re=r"-->", |
| 144 | +) |
| 145 | + |
| 146 | +# HTML/XML tags: <tag>, </tag> |
| 147 | +HTML_OPEN_TAG = AtomicPattern( |
| 148 | + name="html_open_tag", |
| 149 | + pattern=r"<[a-zA-Z][^>]*>", |
| 150 | + open_delim="", |
| 151 | + close_delim="", |
| 152 | + open_re="", |
| 153 | + close_re="", |
| 154 | +) |
| 155 | + |
| 156 | +HTML_CLOSE_TAG = AtomicPattern( |
| 157 | + name="html_close_tag", |
| 158 | + pattern=r"</[a-zA-Z][^>]*>", |
| 159 | + open_delim="", |
| 160 | + close_delim="", |
| 161 | + open_re="", |
| 162 | + close_re="", |
| 163 | +) |
| 164 | + |
| 165 | +# All patterns in priority order (more specific patterns first). |
| 166 | +# Paired tag patterns must come before single tag patterns to match correctly. |
| 167 | +ATOMIC_PATTERNS: tuple[AtomicPattern, ...] = ( |
| 168 | + INLINE_CODE_SPAN, |
| 169 | + MARKDOWN_LINK, |
| 170 | + # Paired tags must come before single tags |
| 171 | + PAIRED_JINJA_TAG, |
| 172 | + PAIRED_JINJA_COMMENT, |
| 173 | + PAIRED_JINJA_VAR, |
| 174 | + PAIRED_HTML_COMMENT, |
| 175 | + # Single tags |
| 176 | + SINGLE_JINJA_TAG, |
| 177 | + SINGLE_JINJA_COMMENT, |
| 178 | + SINGLE_JINJA_VAR, |
| 179 | + SINGLE_HTML_COMMENT, |
| 180 | + # HTML tags |
| 181 | + HTML_OPEN_TAG, |
| 182 | + HTML_CLOSE_TAG, |
| 183 | +) |
| 184 | + |
| 185 | +# Compiled regex combining all patterns with alternation |
| 186 | +ATOMIC_CONSTRUCT_PATTERN: re.Pattern[str] = re.compile( |
| 187 | + "|".join(p.pattern for p in ATOMIC_PATTERNS), |
| 188 | + re.DOTALL, |
| 189 | +) |
0 commit comments