11import re
22from re import Match , Pattern
33
4+ from flowmark .linewrapping .tag_handling import TEMPLATE_TAG_PATTERN
5+
46# Precompiled regex patterns
57PARAGRAPH_BREAK_PATTERN : Pattern [str ] = re .compile (r"\n\s*\n" )
68
@@ -19,49 +21,15 @@ def is_multi_paragraph(text: str) -> bool:
1921 return PARAGRAPH_BREAK_PATTERN .search (text ) is not None
2022
2123
22- def smart_quotes (text : str ) -> str :
23- r"""
24- Replace straight ASCII quotes and apostrophes with typographic quotes and apostrophes
25- when this can be done safely. Aims to be conservative so it doesn't break code or
26- things that aren't language.
27-
28- Text that is wrapped in single or double quotes is replaced with typographic quotes
29- if it has whitespace or a newline at the front and is followed by whitespace or
30- a [.,?!]. The content inside quotes must not contain any of the same type (single
31- or double). Quotes containing paragraph breaks (two newlines) are left unchanged.
32-
33- Straight quotes are converted to apostrophes if they are the only straight quote
34- in the word, and have word characters on both sides:
35-
36- I'm there with "George" -> I’m there with “George”
37- "Hello," he said. -> “Hello,” he said.
38- "I know!" -> “I know!”
39-
40- Words in 'single quotes' work too -> Words in 'single quotes' work too
41-
42- I'm there -> I’m there
43- I'll be there, don't worry -> I’ll be there, don’t worry
44- X is 'foo' -> X is ‘foo’
45-
46- A few special rules to better help with English:
47-
48- Jill's -> Jill’s
49- James' -> James’
50-
51- Other patterns are unchanged:
52-
53- x="foo" -> x="foo"
54- x='foo' -> x='foo'
55- Blah'blah'blah -> Blah'blah'blah
56- ""quotes"s -> ""quotes"s
57- \"escaped\" -> \"escaped\"
58- 'apos -> 'apos
59- 'apos'trophes -> 'apos'trophes
60- $James' -> $James'
24+ def _apply_smart_quotes_to_text (text : str ) -> str :
25+ """
26+ Apply smart quote conversion to a text segment.
6127
28+ This is the core smart quotes logic, applied only to text that is NOT inside
29+ template tags.
6230 """
6331
64- # First handle quoted text - both single and double quotes
32+ # Handle quoted text - both single and double quotes
6533 def replace_quotes (match : Match [str ]) -> str :
6634 prefix = match .group (1 )
6735 double_content = match .group (2 ) # Content of double quotes
@@ -83,14 +51,11 @@ def replace_quotes(match: Match[str]) -> str:
8351
8452 result = QUOTE_PATTERN .sub (replace_quotes , text )
8553
86- # Now handle apostrophes/contractions
54+ # Handle apostrophes/contractions
8755 # Only convert single quotes that are:
8856 # 1. The only quote in the word
8957 # 2. Have word characters on both sides OR are possessives at end of words ending in s/S
9058
91- # Pattern for apostrophes: word char + ' + word char, where ' is the only quote in the word
92- # We need to be careful not to match words that have multiple quotes
93-
9459 # Split by whitespace to process words individually
9560 words = re .split (r"(\s+)" , result )
9661
@@ -115,3 +80,80 @@ def replace_quotes(match: Match[str]) -> str:
11580 words [i ] = re .sub (r"\'" , "\u2019 " , word )
11681
11782 return "" .join (words )
83+
84+
85+ def smart_quotes (text : str ) -> str :
86+ r"""
87+ Replace straight ASCII quotes and apostrophes with typographic quotes and apostrophes
88+ when this can be done safely. Aims to be conservative so it doesn't break code or
89+ things that aren't language.
90+
91+ IMPORTANT: Quotes inside template tags (Jinja/Markdoc `{% %}`, `{# #}`, `{{ }}`,
92+ and HTML comments `<!-- -->`) are NEVER converted, as this would break template
93+ syntax.
94+
95+ Text that is wrapped in single or double quotes is replaced with typographic quotes
96+ if it has whitespace or a newline at the front and is followed by whitespace or
97+ a [.,?!]. The content inside quotes must not contain any of the same type (single
98+ or double). Quotes containing paragraph breaks (two newlines) are left unchanged.
99+
100+ Straight quotes are converted to apostrophes if they are the only straight quote
101+ in the word, and have word characters on both sides:
102+
103+ I'm there with "George" -> I’m there with “George”
104+ "Hello," he said. -> “Hello,” he said.
105+ "I know!" -> “I know!”
106+
107+ Words in 'single quotes' work too -> Words in 'single quotes' work too
108+
109+ I'm there -> I’m there
110+ I'll be there, don't worry -> I’ll be there, don’t worry
111+ X is 'foo' -> X is ‘foo’
112+
113+ A few special rules to better help with English:
114+
115+ Jill's -> Jill’s
116+ James' -> James’
117+
118+ Other patterns are unchanged:
119+
120+ x="foo" -> x="foo"
121+ x='foo' -> x='foo'
122+ Blah'blah'blah -> Blah'blah'blah
123+ ""quotes"s -> ""quotes"s
124+ \"escaped\" -> \"escaped\"
125+ 'apos -> 'apos
126+ 'apos'trophes -> 'apos'trophes
127+ $James' -> $James'
128+
129+ Template tag content is never modified:
130+
131+ {% field kind="string" %} -> {% field kind="string" %}
132+ {{ variable }} -> {{ variable }}
133+ {# comment "here" #} -> {# comment "here" #}
134+ <!-- html kind="comment" --> -> <!-- html kind="comment" -->
135+
136+ """
137+ # Split text into segments: template tags (protected) and regular text.
138+ # We apply smart quotes only to regular text segments.
139+ segments : list [str ] = []
140+ last_end = 0
141+
142+ for match in TEMPLATE_TAG_PATTERN .finditer (text ):
143+ start , end = match .span ()
144+
145+ # Add the text before this tag (apply smart quotes to it)
146+ if start > last_end :
147+ before_text = text [last_end :start ]
148+ segments .append (_apply_smart_quotes_to_text (before_text ))
149+
150+ # Add the tag itself unchanged
151+ segments .append (match .group (0 ))
152+ last_end = end
153+
154+ # Add any remaining text after the last tag
155+ if last_end < len (text ):
156+ remaining = text [last_end :]
157+ segments .append (_apply_smart_quotes_to_text (remaining ))
158+
159+ return "" .join (segments )
0 commit comments