Skip to content

Commit 5ec2a0c

Browse files
jlevyclaude
andcommitted
Add preprocessing to ensure blank lines around block content in tags
When lists or tables appear inside tags without blank lines, the CommonMark parser uses lazy continuation to incorrectly merge tags with block content. This caused closing tags to be absorbed into list items. The fix adds preprocessing before Markdown parsing to insert blank lines: - After opening tag lines when followed by block content (lists/tables) - Before closing tag lines when preceded by block content This ensures proper parsing and prevents tags from being merged with lists. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 99d5bc1 commit 5ec2a0c

File tree

7 files changed

+271
-21
lines changed

7 files changed

+271
-21
lines changed

src/flowmark/linewrapping/markdown_filling.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
)
2222
from flowmark.linewrapping.protocols import LineWrapper
2323
from flowmark.linewrapping.sentence_split_regex import split_sentences_regex
24-
from flowmark.linewrapping.tag_handling import TagWrapping
24+
from flowmark.linewrapping.tag_handling import TagWrapping, preprocess_tag_block_spacing
2525
from flowmark.linewrapping.text_filling import DEFAULT_WRAP_WIDTH
2626
from flowmark.transforms.doc_cleanups import doc_cleanups
2727
from flowmark.transforms.doc_transforms import rewrite_text_content
@@ -87,6 +87,11 @@ def fill_markdown(
8787

8888
markdown_text = markdown_text.strip() + "\n"
8989

90+
# Preprocess: ensure proper blank lines around block content within tags.
91+
# This must happen before parsing to prevent CommonMark lazy continuation
92+
# from incorrectly merging tags with lists/tables.
93+
markdown_text = preprocess_tag_block_spacing(markdown_text)
94+
9095
# Parse and render.
9196
marko = flowmark_markdown(line_wrapper, list_spacing)
9297
document = marko.parse(markdown_text)

src/flowmark/linewrapping/tag_handling.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,92 @@ def get_tag_coalescing_patterns(max_words: int = MAX_TAG_WORDS) -> list[tuple[st
266266
]
267267

268268

269+
def _is_tag_only_line(line: str) -> bool:
270+
"""
271+
Check if a line contains only a tag (opening or closing), not inline tags in content.
272+
273+
A tag-only line starts with a tag delimiter and ends with a tag delimiter,
274+
with no substantial non-tag content. This distinguishes:
275+
- `{% field %}` (tag-only line)
276+
- `- [ ] Item {% #id %}` (content with inline tag - NOT tag-only)
277+
"""
278+
stripped = line.strip()
279+
if not stripped:
280+
return False
281+
282+
# Check if it starts with a tag
283+
starts_tag = (
284+
stripped.startswith(JINJA_TAG_OPEN)
285+
or stripped.startswith(JINJA_COMMENT_OPEN)
286+
or stripped.startswith(JINJA_VAR_OPEN)
287+
or stripped.startswith(HTML_COMMENT_OPEN)
288+
)
289+
290+
# Check if it ends with a tag
291+
ends_tag = (
292+
stripped.endswith(JINJA_TAG_CLOSE)
293+
or stripped.endswith(JINJA_COMMENT_CLOSE)
294+
or stripped.endswith(JINJA_VAR_CLOSE)
295+
or stripped.endswith(HTML_COMMENT_CLOSE)
296+
)
297+
298+
return starts_tag and ends_tag
299+
300+
301+
def preprocess_tag_block_spacing(text: str) -> str:
302+
"""
303+
Preprocess text to ensure proper blank lines around block content within tags.
304+
305+
When block content (lists, tables) appears directly after an opening tag or
306+
directly before a closing tag, the CommonMark parser may use lazy continuation
307+
to merge them incorrectly. This function inserts blank lines to prevent this.
308+
309+
This preprocessing must happen BEFORE Markdown parsing, as the parser's
310+
structure cannot be fixed after the fact.
311+
312+
Example transformation:
313+
{% field %}
314+
- item 1
315+
- item 2
316+
{% /field %}
317+
318+
Becomes:
319+
{% field %}
320+
321+
- item 1
322+
- item 2
323+
324+
{% /field %}
325+
"""
326+
lines = text.split("\n")
327+
result_lines: list[str] = []
328+
329+
# Check if there are any tag-only lines in the text
330+
has_tag_only_lines = any(_is_tag_only_line(line) for line in lines)
331+
if not has_tag_only_lines:
332+
return text
333+
334+
for i, line in enumerate(lines):
335+
# Check if we need to add a blank line BEFORE this line
336+
if i > 0:
337+
prev_line = lines[i - 1]
338+
prev_is_empty = prev_line.strip() == ""
339+
340+
# Case 1: Previous line is a tag-only line, current line is block content
341+
# (need blank line after opening tag before list/table)
342+
if not prev_is_empty and _is_tag_only_line(prev_line) and line_is_block_content(line):
343+
result_lines.append("")
344+
345+
# Case 2: Previous line is block content, current line is a closing tag-only line
346+
# (need blank line after list/table before closing tag)
347+
if not prev_is_empty and line_is_block_content(prev_line) and _is_tag_only_line(line):
348+
result_lines.append("")
349+
350+
result_lines.append(line)
351+
352+
return "\n".join(result_lines)
353+
354+
269355
def line_ends_with_tag(line: str) -> bool:
270356
"""Check if a line ends with a Jinja/Markdoc tag or HTML comment."""
271357
stripped = line.rstrip()

tests/test_tag_formatting.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,3 +507,150 @@ def test_html_comment_multiline_closing():
507507

508508
# Closing comment should be on its own line
509509
assert "-->\n<!-- /f:field -->" in result, f"HTML closing tag not split: {result}"
510+
511+
512+
def test_preprocess_tag_block_spacing_lists():
513+
"""
514+
Test that blank lines are added around lists inside tags.
515+
516+
This prevents CommonMark lazy continuation from merging tags with lists.
517+
"""
518+
from flowmark.linewrapping.tag_handling import (
519+
preprocess_tag_block_spacing,
520+
)
521+
522+
# Input without blank lines around list
523+
text = dedent("""
524+
{% field kind="single_select" id="choice" %}
525+
- [ ] Option A {% #a %}
526+
- [ ] Option B {% #b %}
527+
{% /field %}
528+
""").strip()
529+
530+
result = preprocess_tag_block_spacing(text)
531+
532+
# Should have blank line after opening tag
533+
assert "{% field" in result
534+
assert "%}\n\n-" in result, f"Missing blank line after opening tag: {result}"
535+
536+
# Should have blank line before closing tag
537+
assert "\n\n{% /field" in result, f"Missing blank line before closing tag: {result}"
538+
539+
540+
def test_preprocess_tag_block_spacing_tables():
541+
"""Test that blank lines are added around tables inside tags."""
542+
from flowmark.linewrapping.tag_handling import (
543+
preprocess_tag_block_spacing,
544+
)
545+
546+
# Input without blank lines around table
547+
text = dedent("""
548+
{% table id="data" %}
549+
| A | B |
550+
|---|---|
551+
| 1 | 2 |
552+
{% /table %}
553+
""").strip()
554+
555+
result = preprocess_tag_block_spacing(text)
556+
557+
# Should have blank line after opening tag
558+
assert "%}\n\n|" in result, f"Missing blank line after opening tag: {result}"
559+
560+
# Should have blank line before closing tag
561+
assert "|\n\n{% /table" in result, f"Missing blank line before closing tag: {result}"
562+
563+
564+
def test_preprocess_tag_block_spacing_already_spaced():
565+
"""Test that already-spaced content is not double-spaced."""
566+
from flowmark.linewrapping.tag_handling import (
567+
preprocess_tag_block_spacing,
568+
)
569+
570+
# Input already has proper blank lines
571+
text = dedent("""
572+
{% field kind="select" %}
573+
574+
- Option 1
575+
- Option 2
576+
577+
{% /field %}
578+
""").strip()
579+
580+
result = preprocess_tag_block_spacing(text)
581+
582+
# Should not add extra blank lines (no triple newlines)
583+
assert "\n\n\n" not in result, f"Extra blank lines added: {result}"
584+
585+
586+
def test_preprocess_tag_block_spacing_inline_tags():
587+
"""Test that inline tags in list items don't trigger extra spacing."""
588+
from flowmark.linewrapping.tag_handling import (
589+
preprocess_tag_block_spacing,
590+
)
591+
592+
# List items with inline tags - should NOT add blank lines between items
593+
text = dedent("""
594+
{% field %}
595+
596+
- Item 1 {% #item1 %}
597+
- Item 2 {% #item2 %}
598+
599+
{% /field %}
600+
""").strip()
601+
602+
result = preprocess_tag_block_spacing(text)
603+
604+
# Should NOT have blank lines between list items
605+
assert "{% #item1 %}\n- Item 2" in result, f"Incorrectly added blank between items: {result}"
606+
607+
608+
def test_fill_markdown_with_list_in_tags():
609+
"""
610+
Integration test: fill_markdown properly formats lists inside tags.
611+
612+
Regression test for the bug where closing tags were merged into list items
613+
due to CommonMark lazy continuation.
614+
"""
615+
text = dedent("""
616+
{% field kind="single_select" id="rating" %}
617+
- [ ] G {% #g %}
618+
- [ ] PG {% #pg %}
619+
- [x] R {% #r %}
620+
{% /field %}
621+
""").strip()
622+
623+
result = fill_markdown(text, semantic=True)
624+
625+
# Opening tag should be followed by blank line
626+
assert "{% field" in result
627+
lines = result.strip().split("\n")
628+
629+
# First line should be the opening tag
630+
assert lines[0].startswith("{% field")
631+
632+
# Second line should be blank
633+
assert lines[1] == "", f"Expected blank line after opening tag, got: {lines[1]}"
634+
635+
# List items should be together (no blank lines between them)
636+
list_start = None
637+
for i, line in enumerate(lines):
638+
if line.startswith("- "):
639+
list_start = i
640+
break
641+
642+
assert list_start is not None
643+
# Check consecutive list items
644+
assert lines[list_start].startswith("- [ ] G")
645+
assert lines[list_start + 1].startswith("- [ ] PG")
646+
assert lines[list_start + 2].startswith("- ")
647+
648+
# There should be a blank line before closing tag
649+
closing_idx = None
650+
for i, line in enumerate(lines):
651+
if line.strip() == "{% /field %}":
652+
closing_idx = i
653+
break
654+
655+
assert closing_idx is not None
656+
assert lines[closing_idx - 1] == "", "Expected blank line before closing tag"

tests/testdocs/testdoc.expected.auto.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,13 +1436,15 @@ Content should start on a new line.
14361436
Closing tags should NOT be merged onto list item lines:
14371437
14381438
{% field kind="single_select" id="choice" label="Choice" %}
1439+
14391440
- [ ] Option A {% #option_a %}
14401441
- [ ] Option B {% #option_b %}
14411442
- [ ] Option C {% #option_c %}
14421443
14431444
{% /field %}
14441445
14451446
<!-- f:field kind="select" id="choice" -->
1447+
14461448
- Option 1 <!-- #opt1 -->
14471449
- Option 2 <!-- #opt2 -->
14481450
- Option 3 <!-- #opt3 -->
@@ -1505,7 +1507,7 @@ lines):
15051507
{% table_container id="pricing" %}
15061508
15071509
| Plan | Price | Features |
1508-
|------|-------|----------|
1510+
| --- | --- | --- |
15091511
| Free | $0 | Basic |
15101512
| Pro | $10 | Advanced |
15111513
@@ -1516,7 +1518,7 @@ Tables inside HTML comment tags without blank lines:
15161518
<!-- f:table id="comparison" -->
15171519
15181520
| Feature | Product A | Product B |
1519-
|---------|-----------|-----------|
1521+
| --- | --- | --- |
15201522
| Speed | Fast | Faster |
15211523
| Cost | Low | Medium |
15221524
@@ -1527,7 +1529,7 @@ Tables inside Jinja variable tags (edge case):
15271529
{{ table_header }}
15281530
15291531
| Column 1 | Column 2 |
1530-
|----------|----------|
1532+
| --- | --- |
15311533
| Data 1 | Data 2 |
15321534
15331535
{{ table_footer }}
@@ -1594,15 +1596,15 @@ Self-closing tags with tables:
15941596
{% divider style="double" /%}
15951597
15961598
| Before Divider | After Divider |
1597-
|----------------|---------------|
1599+
| --- | --- |
15981600
| A | B |
15991601
16001602
{% spacer height="20" /%}
16011603
16021604
<!-- separator -->
16031605
16041606
| Item | Value |
1605-
|------|-------|
1607+
| --- | --- |
16061608
| X | 1 |
16071609
16081610
<!-- end-section -->
@@ -1638,6 +1640,7 @@ Your feedback is valuable to us.
16381640
{% /description %}
16391641
16401642
{% field kind="single_select" id="rating" label="Overall Rating" required=true %}
1643+
16411644
- [ ] Excellent {% #excellent %}
16421645
- [ ] Good {% #good %}
16431646
- [ ] Fair {% #fair %}

0 commit comments

Comments
 (0)