Skip to content

Commit 6a1d640

Browse files
authored
Merge pull request #35 from mxtoai/fix-output
Fix output formatting, have factcheck to include additional summary, …
2 parents d54f231 + 08c3681 commit 6a1d640

File tree

10 files changed

+251
-152
lines changed

10 files changed

+251
-152
lines changed

mxtoai/agents/email_agent.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ def _initialize_search_tools(self) -> SearchWithFallbackTool:
160160
161161
Returns:
162162
SearchWithFallbackTool: The configured search tool.
163+
163164
"""
164165
ddg_search_tool = WebSearchTool(engine="duckduckgo", max_results=5)
165166
logger.debug("Initialized WebSearchTool with DuckDuckGo engine.")
@@ -171,13 +172,13 @@ def _initialize_search_tools(self) -> SearchWithFallbackTool:
171172
# No need to log here as _initialize_google_search_tool does it.
172173

173174
primary_search_engines: list[Tool] = []
174-
if ddg_search_tool: # ddg_search_tool is always initialized
175+
if ddg_search_tool: # ddg_search_tool is always initialized
175176
primary_search_engines.append(ddg_search_tool)
176-
if brave_search_tool: # brave_search_tool might be None if API key is missing
177+
if brave_search_tool: # brave_search_tool might be None if API key is missing
177178
primary_search_engines.append(brave_search_tool)
178179

179180
if not primary_search_engines:
180-
logger.warning(
181+
logger.warning(
181182
"No primary search engines (DuckDuckGo, Brave) could be initialized for SearchWithFallbackTool."
182183
)
183184

mxtoai/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from fastapi import Depends, FastAPI, File, Form, HTTPException, Response, UploadFile, status
1414
from fastapi.security import APIKeyHeader
1515

16+
from mxtoai import validators
1617
from mxtoai._logging import get_logger
1718
from mxtoai.agents.email_agent import EmailAgent
1819
from mxtoai.config import ATTACHMENTS_DIR, SKIP_EMAIL_DELIVERY
@@ -33,7 +34,6 @@
3334
validate_email_whitelist,
3435
validate_rate_limits,
3536
)
36-
from mxtoai import validators
3737

3838
# Load environment variables
3939
load_dotenv()

mxtoai/prompts/base_prompts.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
MARKDOWN FORMATTING REQUIREMENTS:
77
- **bold** for emphasis
88
- _italics_ for quotes
9-
- ### for section headers (if needed)
9+
- Strictly use ### for section headers
1010
- Proper bullet points and numbered lists
1111
- Clear paragraph spacing
1212
"""
@@ -18,7 +18,8 @@
1818
- Include only relevant information
1919
- Maintain appropriate tone and style
2020
- Use proper spacing and formatting
21-
- ALWAYS Indent each nested level with two spaces
21+
- Try to maintain visual hierarchy of the response using section headers and lists
22+
- Do not add numbering in front of section headers
2223
- DO NOT add any signature - it will be added automatically
2324
- If web search tools were used, create a 'References' section at the end of your response. List the titles and URLs of the web pages used, formatted as markdown links (e.g., `1. [Page Title](URL)`).
2425
"""

mxtoai/prompts/output_prompts.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
# Ask handler output guidelines
3838
ASK_OUTPUT_GUIDELINES = """
3939
Output Format Guidelines:
40-
1. Begin with acknowledgment of the question
40+
1. Begin with acknowledgment of the question at the top of the response.
4141
2. Structure response with clear sections
4242
3. Use examples to illustrate complex points
4343
4. Include actionable recommendations when applicable
@@ -47,11 +47,12 @@
4747
# Fact-check handler output guidelines
4848
FACT_CHECK_OUTPUT_GUIDELINES = """
4949
Output Format Guidelines:
50-
1. Present each claim in this format:
50+
1. Present a short summary of the original email to setup the context.
51+
2. Present each claim in this format:
5152
- **Claim**: [Original statement]
5253
- **Status**: [Verified ✓ / Not verified ❌ / Partially verified ⚠️]
5354
- **Evidence**: [Supporting information]
54-
- **Sources**: [Citations with links]
55+
- **Sources**: [Citations with links, make sure the links are valid]
5556
2. Use consistent status symbols throughout
5657
"""
5758

mxtoai/prompts/template_prompts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989
1. State each claim clearly
9090
2. Provide verification status
9191
3. Include supporting evidence
92-
4. Cite reliable sources
92+
4. Cite reliable sources with actual links to the source
9393
5. Note any uncertainties
9494
6. Always give a disclaimer that sometimes links may be outdated or incorrect depending on age of the source
9595
"""

mxtoai/scripts/report_formatter.py

Lines changed: 111 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import re
44
from typing import Any, Optional
55

6+
import markdown2
67
from jinja2 import Environment, FileSystemLoader, select_autoescape
78

89
from mxtoai._logging import get_logger
@@ -36,11 +37,11 @@ def __init__(self, template_dir: Optional[str] = None):
3637
# Default signature
3738
self.signature_block = """
3839
39-
---
40+
<hr style="margin: 2em 0; border: none; border-top: 1px solid #ddd;">
4041
41-
**MXtoAI Assistant**
42+
<p><strong>MXtoAI Assistant</strong></p>
4243
43-
_Feel free to reply to this email to continue our conversation._
44+
<p><em>Feel free to reply to this email to continue our conversation.</em></p>
4445
"""
4546

4647
def _init_template_env(self):
@@ -226,7 +227,7 @@ def _to_plain_text(self, markdown: str) -> str:
226227

227228
def _to_html(self, markdown_content: str, theme: str = "default") -> str:
228229
"""
229-
Convert markdown to HTML using templates and themes.
230+
Convert markdown to HTML using markdown2 for robust AI-generated content handling.
230231
231232
Args:
232233
markdown_content: Markdown content
@@ -236,55 +237,116 @@ def _to_html(self, markdown_content: str, theme: str = "default") -> str:
236237
HTML version
237238
238239
"""
239-
try:
240-
import markdown as md_converter
241-
from markdown.extensions.attr_list import AttrListExtension
242-
from markdown.extensions.fenced_code import FencedCodeExtension
243-
from markdown.extensions.nl2br import Nl2BrExtension
244-
from markdown.extensions.sane_lists import SaneListExtension
245-
from markdown.extensions.tables import TableExtension
246-
from markdown.extensions.toc import TocExtension
247-
248-
# Pre-process to ensure lists following non-empty lines have a preceding blank line
249-
markdown_content = re.sub(r'([^\n])\n(\s*(?:[-*+]|\d+\.)[ \t])', r'\1\n\n\2', markdown_content)
250-
251-
# Configure extensions with specific settings
252-
extensions = [
253-
TableExtension(), # Support for tables
254-
FencedCodeExtension(), # Support for fenced code blocks
255-
SaneListExtension(), # Better list handling
256-
Nl2BrExtension(), # Convert newlines to line breaks
257-
TocExtension(permalink=False), # Table of contents support without permalinks
258-
AttrListExtension(), # Support for attributes
259-
]
260-
261-
# Convert markdown to HTML with configured extensions
262-
html_content = md_converter.markdown(
263-
markdown_content,
264-
extensions=extensions,
265-
extension_configs={
266-
# Explicitly disable footnotes if it's a default or separate extension
267-
# 'markdown.extensions.footnotes': {'PLACE_MARKER': '!!!!FOOTNOTES!!!!'}
268-
},
269-
output_format="html5", # Use html5 for better compatibility
270-
)
240+
# Pre-process markdown to fix issues not handled by markdown2
241+
markdown_content = self._fix_ai_markdown(markdown_content)
242+
243+
# Convert markdown to HTML with markdown2 (robust for AI content)
244+
html_content = markdown2.markdown(
245+
markdown_content,
246+
extras=[
247+
"fenced-code-blocks", # Support for ```code``` blocks
248+
"tables", # Support for tables
249+
"strike", # Support for ~~strikethrough~~
250+
"cuddled-lists", # Better list handling (key for AI content!)
251+
"header-ids", # Add IDs to headers
252+
"markdown-in-html", # Allow markdown inside HTML
253+
"breaks", # Handle line breaks better
254+
],
255+
)
256+
257+
if self.template_env:
258+
try:
259+
theme_settings = self.themes.get(theme, self.themes["default"])
260+
template = self.template_env.get_template("email_template.html")
261+
262+
return template.render(content=html_content, theme=theme_settings)
263+
except Exception as e:
264+
logger.error(f"Template rendering failed: {e}. Falling back to basic rendering.")
265+
266+
# fallback
267+
logger.info("Template environment not available. Using basic HTML rendering.")
268+
return self._basic_html_render(html_content)
269+
270+
def _fix_ai_markdown(self, content: str) -> str:
271+
"""
272+
Fix AI-generated markdown issues that markdown2 doesn't handle.
273+
Only includes fixes that are actually necessary with markdown2's cuddled-lists extra.
271274
272-
if self.template_env:
273-
try:
274-
theme_settings = self.themes.get(theme, self.themes["default"])
275-
template = self.template_env.get_template("email_template.html")
275+
Args:
276+
content: Raw markdown content
276277
277-
return template.render(content=html_content, theme=theme_settings)
278-
except Exception as e:
279-
logger.error(f"Template rendering failed: {e}. Falling back to basic rendering.")
278+
Returns:
279+
Fixed markdown content
280280
281-
# fallback
282-
logger.info("Template environment not available. Using basic HTML rendering.")
283-
return self._basic_html_render(html_content, theme)
281+
"""
282+
# Fix missing spaces after list markers, but convert section headers to proper headers
283+
lines = content.split("\n")
284+
result_lines = []
285+
286+
for line in lines:
287+
# Check if this line looks like a list item without proper spacing
288+
if re.match(r"^(\s*)(\d+\.|\*|-|\+)([^\s])", line):
289+
# Get the indentation, marker, and text
290+
match = re.match(r"^(\s*)(\d+\.|\*|-|\+)(.*)$", line)
291+
if match:
292+
indent, marker, rest_of_line = match.groups()
293+
294+
# Check if this is likely a section header vs a real list item
295+
if marker.endswith(".") and self._is_section_header(rest_of_line.strip()):
296+
# Convert to a proper markdown header
297+
header_text = rest_of_line.strip()
298+
line = f"## {header_text}"
299+
else:
300+
# This is a real list item, fix the spacing
301+
line = f"{indent}{marker} {rest_of_line.lstrip()}"
302+
303+
result_lines.append(line)
304+
305+
content = "\n".join(result_lines)
306+
307+
# Convert letter-based lists to numbers (no markdown parser handles this)
308+
return self._convert_letter_lists_to_numbers(content)
309+
310+
def _is_section_header(self, text: str) -> bool:
311+
"""
312+
Simple keyword-based check for section headers.
313+
"""
314+
text_lower = text.lower()
315+
section_keywords = [
316+
"acknowledgment",
317+
"understanding",
318+
"summary",
319+
"response",
320+
"detailed",
321+
"top 10",
322+
"posts",
323+
"trending",
324+
"with summaries",
325+
]
326+
return any(keyword in text_lower for keyword in section_keywords)
284327

285-
except ImportError:
286-
logger.error("Markdown package not available - this should never happen as it's a required dependency")
287-
raise # We should always have markdown package available
328+
def _convert_letter_lists_to_numbers(self, content: str) -> str:
329+
"""
330+
Convert letter-based list markers (a., b., c.) to numbers (1., 2., 3.)
331+
so they can be properly parsed as nested ordered lists.
332+
CSS will handle styling them back to letters.
333+
"""
334+
lines = content.split("\n")
335+
result_lines = []
336+
337+
for line in lines:
338+
# Match lines that start with letter-based list markers
339+
match = re.match(r"^(\s*)([a-z])\.\s+(.*)$", line)
340+
if match:
341+
indent, letter, text = match.groups()
342+
# Convert letter to number (a=1, b=2, c=3, etc.)
343+
number = ord(letter) - ord("a") + 1
344+
# Replace with number-based marker
345+
line = f"{indent}{number}. {text}"
346+
347+
result_lines.append(line)
348+
349+
return "\n".join(result_lines)
288350

289351
def _basic_html_render(self, html_content: str) -> str:
290352
"""

0 commit comments

Comments
 (0)