Skip to content

Commit b2cc779

Browse files
committed
fix: Enhance linter for docxtpl extended Jinja2 syntax and JSON output
🔧 DocXTPL Jinja2 Extensions Support: - Added support for docxtpl extended tags: {%p %}, {%tr %}, {%tc %}, {%r %} - Recognize RichText variables: {{r variable }} - Support docxtpl-specific tags: cellbg, colspan, hm, vm - Updated tag patterns to handle p/tr/tc/r prefixes correctly - Fixed tag matching logic for prefix-aware parsing 📋 Tag Handling Improvements: - Removed 'set' from paired_tags (set doesn't need endset) - Added 'set' to standalone_tags (correct Jinja2 behavior) - Enhanced error messages to include docxtpl prefixes - Updated mismatched/unclosed tag suggestions with proper syntax 📄 JSON Output Fix: - Removed JSON truncation in template data summary - Now shows complete template data in PDF reports - Based on docxtpl documentation: https://docxtpl.readthedocs.io/en/latest/ 🛠️ Technical Changes: - Updated regex patterns for block_start/block_end detection - Modified tag matching algorithm to handle prefix groups - Enhanced structure checking for docxtpl syntax - Improved error context and suggestions ✅ Now Supports: - Standard Jinja2: {% if %} {% endif %} - DocXTPL paragraphs: {%p if %} {%p endif %} - DocXTPL table rows: {%tr for %} {%tr endfor %} - DocXTPL table cells: {%tc colspan 2 %} - RichText variables: {{r mytext }} - Proper 'set' tag handling (no endset needed)
1 parent 38ee36d commit b2cc779

File tree

2 files changed

+41
-25
lines changed

2 files changed

+41
-25
lines changed

services/docx_linter.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,27 +43,31 @@ def __init__(self):
4343
lstrip_blocks=True
4444
)
4545

46-
# Jinja tag patterns for matching analysis
46+
# Jinja tag patterns for matching analysis (including docxtpl extensions)
4747
self.tag_patterns = {
48-
'block_start': re.compile(r'{%\s*(\w+)(?:\s+[^%]*)?%}'),
49-
'block_end': re.compile(r'{%\s*end(\w+)\s*%}'),
48+
'block_start': re.compile(r'{%\s*([ptr]?)\s*(\w+)(?:\s+[^%]*)?%}'), # Include p, tr, tc, r prefixes
49+
'block_end': re.compile(r'{%\s*([ptr]?)\s*end(\w+)\s*%}'), # Include p, tr, tc, r prefixes
5050
'variable': re.compile(r'{{[^}]*}}'),
51+
'richtext_variable': re.compile(r'{{r\s+[^}]*}}'), # RichText variables
5152
'comment': re.compile(r'{#[^#]*#}'),
53+
'docxtpl_comment': re.compile(r'{#[ptr]\s+[^#]*#}'), # DocXTPL paragraph/row/cell comments
5254
'full_tag': re.compile(r'{[%{#][^}%#]*[%}#]}')
5355
}
5456

55-
# Tags that require matching end tags
57+
# Tags that require matching end tags (excluding 'set' which doesn't need endset)
5658
self.paired_tags = {
57-
'if', 'for', 'with', 'set', 'block', 'macro', 'call',
59+
'if', 'for', 'with', 'block', 'macro', 'call',
5860
'filter', 'trans', 'pluralize', 'raw', 'autoescape'
5961
}
6062

61-
# Tags that are self-contained
63+
# Tags that are self-contained (including docxtpl extensions)
6264
self.standalone_tags = {
63-
'else', 'elif', 'endif', 'endfor', 'endwith', 'endset',
64-
'endblock', 'endmacro', 'endcall', 'endfilter', 'endtrans',
65-
'endpluralize', 'endraw', 'endautoescape', 'include', 'import',
66-
'from', 'extends', 'break', 'continue'
65+
'else', 'elif', 'endif', 'endfor', 'endwith', 'endblock',
66+
'endmacro', 'endcall', 'endfilter', 'endtrans', 'endpluralize',
67+
'endraw', 'endautoescape', 'include', 'import', 'from', 'extends',
68+
'break', 'continue', 'set', # 'set' is standalone, no endset needed
69+
# DocXTPL-specific tags
70+
'cellbg', 'colspan', 'hm', 'vm' # DocXTPL special tags
6771
}
6872

6973
async def lint_docx_file(
@@ -331,19 +335,22 @@ def _check_tag_matching(self, content: str, options: LintOptions) -> List[LintEr
331335

332336
# Process opening tags
333337
for match in block_starts:
334-
tag_name = match.group(1).lower()
338+
prefix = match.group(1) or '' # p, tr, tc, r prefix (may be empty)
339+
tag_name = match.group(2).lower() if match.group(2) else match.group(1).lower()
335340
full_match = match.group(0)
336341

337342
if tag_name in self.paired_tags:
338343
tag_stack.append({
339344
'tag': tag_name,
345+
'prefix': prefix,
340346
'line': line_num,
341347
'content': full_match.strip(),
342348
'position': match.start()
343349
})
344350
elif tag_name not in self.standalone_tags:
345-
# Unknown tag
346-
errors.append(LintError(
351+
# Unknown tag (but don't flag docxtpl prefixed tags as unknown)
352+
if not prefix or prefix not in ['p', 'tr', 'tc', 'r']:
353+
errors.append(LintError(
347354
line_number=line_num,
348355
column=match.start(),
349356
error_type=LintErrorType.SYNTAX_ERROR,
@@ -355,7 +362,8 @@ def _check_tag_matching(self, content: str, options: LintOptions) -> List[LintEr
355362

356363
# Process closing tags
357364
for match in block_ends:
358-
end_tag_name = match.group(1).lower()
365+
end_prefix = match.group(1) or '' # p, tr, tc, r prefix (may be empty)
366+
end_tag_name = match.group(2).lower() if match.group(2) else match.group(1).lower()
359367
full_match = match.group(0)
360368

361369
if not tag_stack:
@@ -373,30 +381,38 @@ def _check_tag_matching(self, content: str, options: LintOptions) -> List[LintEr
373381

374382
# Check if closing tag matches the most recent opening tag
375383
expected_tag = tag_stack[-1]['tag']
376-
if end_tag_name == expected_tag:
384+
expected_prefix = tag_stack[-1].get('prefix', '')
385+
if end_tag_name == expected_tag and end_prefix == expected_prefix:
377386
tag_stack.pop() # Correct match
378387
else:
379388
# Mismatched tags
380389
opening_info = tag_stack[-1]
390+
expected_full = f"{expected_prefix}end{expected_tag}" if expected_prefix else f"end{expected_tag}"
391+
found_full = f"{end_prefix}end{end_tag_name}" if end_prefix else f"end{end_tag_name}"
381392
errors.append(LintError(
382393
line_number=line_num,
383394
column=match.start(),
384395
error_type=LintErrorType.MISMATCHED_TAG,
385-
message=f"Expected 'end{expected_tag}' but found 'end{end_tag_name}'",
396+
message=f"Expected '{expected_full}' but found '{found_full}'",
386397
context=line.strip(),
387398
tag_name=end_tag_name,
388-
suggestion=f"Change to {{% end{expected_tag} %}} or check tag nesting (opened at line {opening_info['line']})"
399+
suggestion=f"Change to {{% {expected_full} %}} or check tag nesting (opened at line {opening_info['line']})"
389400
))
390401

391402
# Check for unclosed tags
392403
for unclosed_tag in tag_stack:
404+
tag_prefix = unclosed_tag.get('prefix', '')
405+
tag_name = unclosed_tag['tag']
406+
full_tag = f"{tag_prefix}{tag_name}" if tag_prefix else tag_name
407+
close_tag = f"{tag_prefix}end{tag_name}" if tag_prefix else f"end{tag_name}"
408+
393409
errors.append(LintError(
394410
line_number=unclosed_tag['line'],
395411
error_type=LintErrorType.UNCLOSED_TAG,
396-
message=f"Unclosed '{unclosed_tag['tag']}' tag",
412+
message=f"Unclosed '{full_tag}' tag",
397413
context=unclosed_tag['content'],
398-
tag_name=unclosed_tag['tag'],
399-
suggestion=f"Add {{% end{unclosed_tag['tag']} %}} tag to close this block"
414+
tag_name=tag_name,
415+
suggestion=f"Add {{% {close_tag} %}} tag to close this block"
400416
))
401417

402418
logger.debug(f"Tag matching check found {len(errors)} errors")
@@ -428,7 +444,8 @@ def _check_template_structure(self, content: str, options: LintOptions) -> List[
428444

429445
# Process opening tags
430446
for match in block_starts:
431-
tag_name = match.group(1).lower()
447+
prefix = match.group(1) or '' # p, tr, tc, r prefix (may be empty)
448+
tag_name = match.group(2).lower() if match.group(2) else match.group(1).lower()
432449

433450
if tag_name in self.paired_tags:
434451
current_depth += 1
@@ -453,7 +470,8 @@ def _check_template_structure(self, content: str, options: LintOptions) -> List[
453470

454471
# Process closing tags
455472
for match in block_ends:
456-
end_tag_name = match.group(1).lower()
473+
end_prefix = match.group(1) or '' # p, tr, tc, r prefix (may be empty)
474+
end_tag_name = match.group(2).lower() if match.group(2) else match.group(1).lower()
457475

458476
if nesting_stack and end_tag_name == nesting_stack[-1]['tag']:
459477
nesting_stack.pop()

services/markdown_formatter.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,7 @@ def _create_header(
9898
if template_data:
9999
header += "## Template Data Summary\n\n"
100100
header += "```json\n"
101-
header += json.dumps(template_data, indent=2, default=str)[:500]
102-
if len(json.dumps(template_data, default=str)) > 500:
103-
header += "\n... (truncated)"
101+
header += json.dumps(template_data, indent=2, default=str)
104102
header += "\n```\n\n"
105103

106104
return header

0 commit comments

Comments
 (0)