Skip to content

Commit 7c710de

Browse files
committed
feat: Enhanced DocX linter with improved error detection and reporting
Major improvements to the DocX Jinja template linter: ## Core Enhancements: - **Stack-based tag matching**: Accurately identifies unmatched opening tags (if/for blocks) - **Two-stage linting process**: Fast syntax check with python-docx, then full docxtpl processing - **Extended docxtpl syntax support**: Handles {%p, {%tr, {%tc, {%r tags properly - **Improved error location accuracy**: Reports exact line numbers of problematic tags ## Error Detection Improvements: - Detects unmatched if/endif and for/endfor blocks - Identifies unknown Jinja2 tags - Handles docxtpl-specific syntax patterns - Preserves document structure during text extraction ## Reporting Enhancements: - **Fixed markdown table formatting**: Proper pipe character escaping (\|) - **Complete error messages**: Displays both message and suggestion in PDF reports - **Extended template context**: Shows 150 characters instead of 50 for better visibility - **Debug output**: Generates intermediate JSON and markdown files for troubleshooting ## New Debug Features: - Intermediate JSON results saved to files - Debug markdown generated for table formatting analysis - Structured text extraction with line numbers - Processing time and metadata tracking ## File Structure Changes: - Reorganized test files into test-data/ directory - Added test templates with various error types - Improved test data organization with separate folders ## Tested Scenarios: ✅ Clean template processing (no errors) ✅ Unmatched tag detection ({%p if without endif}) ✅ Unknown tag detection ({%invalid %}) ✅ Proper PDF generation for both success and error cases ✅ Accurate line number reporting ✅ Complete error message display in PDF reports
1 parent 3371ac7 commit 7c710de

20 files changed

+1418
-1531
lines changed

models/schemas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class LintResult(BaseModel):
8383
summary: LintSummary = Field(..., description="Summary statistics")
8484
template_content: Optional[str] = Field(None, description="Extracted template content")
8585
template_preview: Optional[str] = Field(None, description="First 500 characters of template")
86+
json_result: Optional[dict] = Field(None, description="Structured JSON result from revised linter")
8687

8788
@property
8889
def has_errors(self) -> bool:

services/docx_linter.py

Lines changed: 696 additions & 431 deletions
Large diffs are not rendered by default.

services/markdown_formatter.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,28 @@ def _create_issues_table(self, issues, issue_type: str) -> str:
152152
# Format template text with context
153153
template_text = "N/A"
154154
if hasattr(issue, 'context') and issue.context:
155-
template_text = f"`{self._escape_markdown(issue.context[:50])}`"
156-
if len(issue.context) > 50:
155+
# Show more context for better readability (150 chars instead of 50)
156+
context_text = issue.context[:150]
157+
escaped_context = self._escape_markdown(context_text)
158+
template_text = f"<code>{escaped_context}</code>"
159+
if len(issue.context) > 150:
157160
template_text += "..."
158161

159-
# Format description with type and suggestion
160-
description = f"**{issue.error_type if hasattr(issue, 'error_type') else issue.warning_type}**<br/>"
161-
description += self._escape_markdown(issue.message)
162+
# Format description with clean message
163+
if hasattr(issue, 'error_type'):
164+
# For errors, show the error type and message
165+
description = f"**{issue.error_type}**<br/>"
166+
description += self._escape_markdown(issue.message)
167+
else:
168+
# For warnings, just show the message without enum prefix
169+
description = "**"
170+
if issue.message and issue.message.startswith("Undefined variable:"):
171+
description += issue.message
172+
else:
173+
description += issue.message or "Warning"
174+
description += "**"
162175

176+
# Add suggestion for both errors and warnings
163177
if issue.suggestion:
164178
description += f"<br/>💡 *{self._escape_markdown(issue.suggestion)}*"
165179

@@ -191,14 +205,18 @@ def _create_template_preview(self, template_preview: str) -> str:
191205
return preview
192206

193207
def _escape_markdown(self, text: str) -> str:
194-
"""Escape special markdown characters."""
208+
"""Escape special markdown characters for markdown tables."""
195209
if not text:
196210
return ""
197211

198-
# Escape common markdown characters
199-
chars_to_escape = ['|', '*', '_', '`', '\\', '[', ']', '(', ')', '#', '+', '-', '.', '!']
212+
# For markdown tables, pipes are the most critical to escape properly
213+
# Escape backslashes first, then other characters
214+
text = text.replace('\\', '\\\\') # Escape existing backslashes first
215+
text = text.replace('|', '\\|') # Escape pipes for table formatting
200216

201-
for char in chars_to_escape:
217+
# Escape other markdown characters
218+
other_chars = ['*', '_', '`', '[', ']', '(', ')', '#', '+', '-', '.', '!']
219+
for char in other_chars:
202220
text = text.replace(char, f'\\{char}')
203221

204222
return text

test-data/test-json/farmer.json

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"farmer": {
3+
"id": "622ce1e1-1342-4ae0-bc95-e9343e05d692",
4+
"farmerCode": "FMR1753987588392126",
5+
"name": "Tariro Mudenda",
6+
"phone": "+263 76 911 8617",
7+
"countryCode": "ZW",
8+
"latitude": -18.97966353241836,
9+
"longitude": 31.9450316961498,
10+
"aez": 5,
11+
"administrativeLevels": {},
12+
"otherActivities": [],
13+
"area": 4.3,
14+
"paymentDetails": [
15+
{
16+
"type": "bank_transfer",
17+
"currency": "USD",
18+
"bank_name": "Stanbic Bank",
19+
"account_name": "Taira Mudenda",
20+
"account_number": "12341234",
21+
"branch_name_or_code": "Branch code"
22+
}
23+
],
24+
"proofOfStake": [
25+
{
26+
"id": "b456fa56-b36d-4d5a-bb69-9cf36ba207c8",
27+
"url": "/api/documents/download/documents/1754423969909_b456fa56-b36d-4d5a-bb69-9cf36ba207c8",
28+
"size": 52718,
29+
"type": "proof_of_stake",
30+
"filename": "4156450_CR (1).pdf",
31+
"mimeType": "application/pdf",
32+
"uploadedAt": "2025-08-05T19:59:29.915Z"
33+
}
34+
],
35+
"county": "Mutare",
36+
"ward": "Ward 10",
37+
"crop": "Sesame",
38+
"organizationIds": [
39+
"36596e50-2531-4645-b864-a66ca6422c25",
40+
"b085b62f-77ff-46aa-a75c-8f0a0bf06b22"
41+
],
42+
"kycStatus": "PENDING",
43+
"isActive": true,
44+
"createdAt": "2025-07-31T18:46:28.393Z",
45+
"updatedAt": "2025-08-05T19:59:31.307Z"
46+
},
47+
"organization": {
48+
"id": "3cff2706-10ed-4504-8495-b7b58fcafc0f",
49+
"name": "MyUbuntu Insurance",
50+
"code": "default"
51+
},
52+
"quotes": [
53+
{
54+
"id": "db820735-4db8-4f47-9a90-c8cc9ff06692",
55+
"number": "Q2025000016",
56+
"status": "DRAFT",
57+
"premium": "0",
58+
"coverage": "0",
59+
"createdAt": "2025-08-01T12:21:37.154Z"
60+
}
61+
],
62+
"policies": [],
63+
"summary": {
64+
"totalQuotes": 1,
65+
"activePolicies": 0,
66+
"totalCoverage": 0,
67+
"totalPremium": 0
68+
},
69+
"generatedAt": "2025-08-07T11:29:19.721Z"
70+
}

test-data/test-results-container/debug-farmer-profile-template-processed.xml

Lines changed: 3 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# 📋 DocX Jinja Template Linting Report
2+
3+
## Document Information
4+
5+
| Field | Value |
6+
|-------|-------|
7+
| **Document Name** | `farmer-profile-template.docx` |
8+
| **Report Generated** | 2025-08-07 15:10:10 |
9+
| **Template Size** | 8,523 characters |
10+
| **Lines Count** | 154 lines |
11+
| **Jinja Tags** | 179 tags |
12+
| **Processing Time** | 279.54ms |
13+
| **Completeness Score** | 100.0% |
14+
15+
## Validation Status
16+
17+
**PASSED** - Template validation successful
18+
19+
20+
## 📊 Summary
21+
22+
| Issue Type | Count |
23+
|------------|-------|
24+
|**Errors** | 0 |
25+
| ⚠️ **Warnings** | 5 |
26+
27+
💡 **Recommendations**: Consider addressing warnings to improve template quality.
28+
29+
30+
31+
<div class="page-break"></div>
32+
33+
# 🔍 Detailed Analysis
34+
35+
## ⚠️ Warnings
36+
37+
| Line | Template Text | Issue Description |
38+
|------|---------------|-------------------|
39+
| Unknown | N/A | **Undefined variable: user**<br/>💡 *Ensure 'user' is provided in template data* |
40+
| 4 | N/A | **Undefined variable: generatedAt**<br/>💡 *Ensure 'generatedAt' is provided in template data* |
41+
| 104 | N/A | **Undefined variable: systemVersion**<br/>💡 *Ensure 'systemVersion' is provided in template data* |
42+
| Unknown | N/A | **Undefined variable: farmer**<br/>💡 *Ensure 'farmer' is provided in template data* |
43+
| 154 | N/A | **Undefined variable: daysSinceRegistration**<br/>💡 *Ensure 'daysSinceRegistration' is provided in template data* |
44+
45+
46+
47+
<div class="page-break"></div>
48+
49+
# 📄 Template Preview
50+
51+
```jinja2
52+
Farmer Profile Report
53+
This is a template for creating a Word document (.docx) for comprehensive farmer profile PDF generation. Convert this markdown to a Word document and save as farmer-profile-template.docx.
54+
{{ farmer.name }} - Farmer Profile
55+
Report ID: {{ farmer.farmerCode }}-PROFILE-{{ generatedAt }}
56+
Executive Summary
57+
This comprehensive farmer profile provides detailed information about {{ farmer.name }}, including personal details, agricultural activities, location information, organization...
58+
```
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
=== STRUCTURED TEXT EXTRACTED WITH python-docx ===
2+
3+
1: Farmer Profile Report
4+
2: This is a template for creating a Word document (.docx) for comprehensive farmer profile PDF generation. Convert this markdown to a Word document and save as farmer-profile-template.docx.
5+
3: {{ farmer.name }} - Farmer Profile
6+
4: Report ID: {{ farmer.farmerCode }}-PROFILE-{{ generatedAt }}
7+
5: Executive Summary
8+
6: This comprehensive farmer profile provides detailed information about {{ farmer.name }}, including personal details, agricultural activities, location information, organizational relationships, payment methods, and supporting documentation.
9+
7: 1. Personal Information
10+
8: 2. Location Information
11+
9: Geographic Details
12+
10: Administrative Levels
13+
11: {% if farmer.administrativeLevels %}
14+
12: {% else %}
15+
13: No information on administrative levels available.
16+
14: {% endif %}
17+
15: 3. Agricultural Information
18+
16: Primary Activities
19+
17: Additional Agricultural Activities
20+
18: {% if farmer.otherActivities %}
21+
19: {% for activity in farmer.otherActivities %}
22+
20: {{ activity }}
23+
21: {% endfor %}
24+
22: {% else %}
25+
23: No additional activities specified.
26+
24: {% endif %}
27+
25: 4. Organization Information
28+
26: Primary Organization
29+
27: Additional Organizations
30+
28: {% if farmer.organizationIds %} The farmer is associated with {{ farmer.organizationIds | length }} additional organization(s) for business intelligence and credit scoring purposes.
31+
29: Organization IDs: {{ farmer.organizationIds | join(', ') }}
32+
30: {% else %}
33+
31: No additional organizational relationships.
34+
32: {% endif %}
35+
33: 5. Payment Information
36+
34: {% if farmer.paymentDetails %}
37+
35: {% for payment in farmer.paymentDetails %}
38+
36: Payment Method {{ loop.index }}
39+
37: Type: {{ payment.type | title | replace('_', ' ') }}
40+
38: {% if payment.type == 'bank_transfer' %}
41+
39: {% elif payment.type == 'mobile_money' %}
42+
40: {% endif %}
43+
41: {% endfor %}
44+
42: {% else %}
45+
43: No payment methods have been configured for this farmer.
46+
44: {% endif %}
47+
45: 6. Document Repository
48+
46: Proof of Stake/Insurable Interest
49+
47: {% if farmer.proofOfStake %}
50+
48: Total Documents: {{ farmer.proofOfStake | length }}
51+
49: {% else %}
52+
50: No proof of stake documents have been uploaded.
53+
51: {% endif %}
54+
52: Document Storage Information
55+
53: All documents are securely stored in encrypted cloud storage with access controls and audit logging. Documents can be retrieved through the farmer management system.
56+
54: 7. Farm Location Map
57+
55: {% if farmer.latitude and farmer.longitude %} GPS Coordinates: {{ farmer.latitude }}, {{ farmer.longitude }}
58+
56: Note: A detailed satellite map showing the exact farm location is available in the digital version of this report. The coordinates provided above can be used with any mapping service to locate the farm.
59+
57: Google Maps Link: Google Maps
60+
58: {% if farmer.aez %} Agro-Ecological Zone: {{ farmer.aez }} This zone classification helps determine appropriate agricultural practices and risk assessments for the region.
61+
59: {% endif %}
62+
60: {% else %} Farm location coordinates are not available. Please update the farmer profile with GPS coordinates for accurate location mapping.
63+
61: {% endif %}
64+
62: 8. KYC & Compliance Status
65+
63: Data Quality Assessment
66+
64: {% if farmer.email and farmer.latitude and farmer.primaryActivity and farmer.paymentDetails %} ✅ Excellent - All required fields are complete
67+
65: {% elif farmer.phone and farmer.name %}
68+
66: ⚠️ Good - Core information present, some optional fields missing
69+
67: {% else %}
70+
68: ❌ Needs Attention - Critical information missing
71+
69: {% endif %}
72+
70: 9. Historical Activity
73+
71: Policy History
74+
72: {% if farmer.policies and farmer.policies | length > 0 %}
75+
73: The farmer has {{ farmer.policies | length }} insurance policies in the system.
76+
74: {% else %}
77+
75: No insurance policies found for this farmer.
78+
76: {% endif %}
79+
77: Quote History
80+
78: {% if farmer.quotes and farmer.quotes | length > 0 %}
81+
79: The farmer has {{ farmer.quotes | length }} insurance quotes in the system.
82+
80: {% else %}
83+
81: No insurance quotes found for this farmer.
84+
82: {% endif %}
85+
83: 10. Risk Assessment & Recommendations
86+
84: Location Risk Factors
87+
85: {% if farmer.latitude and farmer.longitude %}
88+
86: ✅ GPS Coordinates Available - Enables precise risk modeling
89+
87: {% if farmer.aez %}
90+
88: ✅ AEZ Classification - Zone {{ farmer.aez }} agricultural practices identified
91+
89: {% else %}
92+
90: ⚠️ AEZ Pending - Agro-ecological zone classification needed
93+
91: {% endif %}
94+
92: {% else %}
95+
93: ❌ Location Missing - GPS coordinates required for risk assessment
96+
94: {% endif %}
97+
95: Data Completeness Score
98+
96: {% set completeness_score = 0 %}{% if farmer.name %}{% set completeness_score = completeness_score + 10 %}{% endif %}{% if farmer.phone %}{% set completeness_score = completeness_score + 10 %}{% endif %}{% if farmer.email %}{% set completeness_score = completeness_score + 10 %}{% endif %}{% if farmer.latitude and farmer.longitude %}{% set completeness_score = completeness_score + 15 %}{% endif %}{% if farmer.primaryActivity or farmer.crop %}{% set completeness_score = completeness_score + 15 %}{% endif %}{% if farmer.area %}{% set completeness_score = completeness_score + 10 %}{% endif %}{% if farmer.paymentDetails %}{% set completeness_score = completeness_score + 15 %}{% endif %}{% if farmer.proofOfStake %}{% set completeness_score = completeness_score + 15 %}{% endif %}
99+
97: Profile Completeness: {{ completeness_score }}%
100+
98: {% if completeness_score >= 80 %}🟢 Excellent - Profile is comprehensive and ready for all services {% elif completeness_score >= 60 %}🟡 Good - Profile is sufficient for basic services
101+
99: {% else %}🔴 Needs Improvement - Additional information required{% endif %}
102+
100: Report Footer
103+
101: Generated By: {{ user.name }} ({{ user.email }})
104+
102: Organization: {{ user.organization.name }}
105+
103: Report Type: Comprehensive Farmer Profile
106+
104: System Version: {{ systemVersion | default('v1.0') }}
107+
105: Data Privacy Notice: This document contains personal and confidential information. Handle according to your organization's data protection policies and applicable privacy regulations.
108+
106: Document Authenticity: This report was generated automatically from the farmer management system. For verification, contact {{ user.organization.name }} at {{ user.email }}.
109+
107: Field | Value
110+
108: Full Name | {{ farmer.name }}
111+
109: Farmer Code | {{ farmer.farmerCode }}
112+
110: Phone Number | {{ farmer.phone }}
113+
111: Email Address | {% if farmer.email %}{{ farmer.email }}{% else %}Not provided{% endif %}
114+
112: Registration Date | {{ farmer.createdAt }}
115+
113: Last Updated | {{ farmer.updatedAt }}
116+
114: Status | {% if farmer.isActive %}Active{% else %}Inactive{% endif %}
117+
115: Field | Value
118+
116: Country | {{ farmer.countryCode }}
119+
117: Coordinates | {% if farmer.latitude and farmer.longitude %}{{ farmer.latitude }}, {{ farmer.longitude }}{% else %}Not available{% endif %}
120+
118: AEZ (Agro-Ecological Zone) | {% if farmer.aez %}{{ farmer.aez }}{% else %}Not determined{% endif %}
121+
119: {%tr for level, value in farmer.administrativeLevels.items() %} | {%tr for level, value in farmer.administrativeLevels.items() %}
122+
120: Level {{ level }} | {{ value }}
123+
121: {%tr endfor %} | {%tr endfor %}
124+
122: Field | Value
125+
123: Primary Activity/Products | {% if farmer.primaryActivity %}{{ farmer.primaryActivity }}{% elif farmer.crop %}{{ farmer.crop }}{% else %}Not specified{% endif %}
126+
124: Farm Size | {% if farmer.area %}{{ farmer.area }} hectares{% else %}Not specified{% endif %}
127+
125: Field | Value
128+
126: Organization Name | {{ farmer.organization.name }}
129+
127: Organization Code | {{ farmer.organization.code }}
130+
128: Organization ID | {{ farmer.organization.id }}
131+
129: Field | Value
132+
130: Account Name | {{ payment.account_name }}
133+
131: Bank Name | {{ payment.bank_name }}
134+
132: Currency | {{ payment.currency }}
135+
133: {%tr if payment.branch_name_or_code %} | {%tr if payment.branch_name_or_code %}
136+
134: Branch | {{ payment.branch_name_or_code }}
137+
135: {%tr endif %} | {%tr endif %}
138+
136: Field | Value
139+
137: Mobile Number | {{ payment.registered_mobile_number }}
140+
138: Full Name | {{ payment.full_name }}
141+
139: {%tr if payment.provider %} | {%tr if payment.provider %}
142+
140: Provider | {{ payment.provider }}
143+
141: {%tr endif %} | {%tr endif %}
144+
142: Document # | Filename | Type | Upload Date | File Size
145+
143: {%tr for doc in farmer.proofOfStake %} | {%tr for doc in farmer.proofOfStake %} | {%tr for doc in farmer.proofOfStake %} | {%tr for doc in farmer.proofOfStake %} | {%tr for doc in farmer.proofOfStake %}
146+
144: {{ loop.index }} | {{ doc.filename }} | {{ doc.type }} | {{ doc.uploadedAt }} | {{ doc.size | filesizeformat }}
147+
145: {%tr endfor %} | {%tr endfor %} | {%tr endfor %} | {%tr endfor %} | {%tr endfor %}
148+
146: Field | Value
149+
147: KYC Status | {% if farmer.kycStatus %}{{ farmer.kycStatus }}{% else %}Pending{% endif %}
150+
148: Account Status | {% if farmer.isActive %}Active{% else %}Inactive{% endif %}
151+
149: Data Completeness | {% if farmer.email and farmer.latitude and farmer.primaryActivity %}Complete{% else %}
152+
150: Needs Update{% endif %}
153+
151: Field | Value
154+
152: Member Since | {{ farmer.createdAt }}
155+
153: Profile Last Updated | {{ farmer.updatedAt }}
156+
154: Days as Member | {{ daysSinceRegistration }}
157+
158+
=== SUMMARY ===
159+
Total lines: 154
160+
Total characters: 8523

0 commit comments

Comments
 (0)