33import re
44from typing import Any , Optional
55
6+ import markdown2
67from jinja2 import Environment , FileSystemLoader , select_autoescape
78
89from mxtoai ._logging import get_logger
@@ -36,11 +37,11 @@ def __init__(self, template_dir: Optional[str] = None):
3637 # Default signature
3738 self .signature_block = """
3839
39- ---
40+ <hr style="margin: 2em 0; border: none; border-top: 1px solid #ddd;">
4041
41- ** MXtoAI Assistant**
42+ <p><strong> MXtoAI Assistant</strong></p>
4243
43- _Feel free to reply to this email to continue our conversation._
44+ <p><em>Feel free to reply to this email to continue our conversation.</em></p>
4445"""
4546
4647 def _init_template_env (self ):
@@ -226,7 +227,7 @@ def _to_plain_text(self, markdown: str) -> str:
226227
227228 def _to_html (self , markdown_content : str , theme : str = "default" ) -> str :
228229 """
229- Convert markdown to HTML using templates and themes .
230+ Convert markdown to HTML using markdown2 for robust AI-generated content handling .
230231
231232 Args:
232233 markdown_content: Markdown content
@@ -236,55 +237,116 @@ def _to_html(self, markdown_content: str, theme: str = "default") -> str:
236237 HTML version
237238
238239 """
239- try :
240- import markdown as md_converter
241- from markdown .extensions .attr_list import AttrListExtension
242- from markdown .extensions .fenced_code import FencedCodeExtension
243- from markdown .extensions .nl2br import Nl2BrExtension
244- from markdown .extensions .sane_lists import SaneListExtension
245- from markdown .extensions .tables import TableExtension
246- from markdown .extensions .toc import TocExtension
247-
248- # Pre-process to ensure lists following non-empty lines have a preceding blank line
249- markdown_content = re .sub (r'([^\n])\n(\s*(?:[-*+]|\d+\.)[ \t])' , r'\1\n\n\2' , markdown_content )
250-
251- # Configure extensions with specific settings
252- extensions = [
253- TableExtension (), # Support for tables
254- FencedCodeExtension (), # Support for fenced code blocks
255- SaneListExtension (), # Better list handling
256- Nl2BrExtension (), # Convert newlines to line breaks
257- TocExtension (permalink = False ), # Table of contents support without permalinks
258- AttrListExtension (), # Support for attributes
259- ]
260-
261- # Convert markdown to HTML with configured extensions
262- html_content = md_converter .markdown (
263- markdown_content ,
264- extensions = extensions ,
265- extension_configs = {
266- # Explicitly disable footnotes if it's a default or separate extension
267- # 'markdown.extensions.footnotes': {'PLACE_MARKER': '!!!!FOOTNOTES!!!!'}
268- },
269- output_format = "html5" , # Use html5 for better compatibility
270- )
240+ # Pre-process markdown to fix issues not handled by markdown2
241+ markdown_content = self ._fix_ai_markdown (markdown_content )
242+
243+ # Convert markdown to HTML with markdown2 (robust for AI content)
244+ html_content = markdown2 .markdown (
245+ markdown_content ,
246+ extras = [
247+ "fenced-code-blocks" , # Support for ```code``` blocks
248+ "tables" , # Support for tables
249+ "strike" , # Support for ~~strikethrough~~
250+ "cuddled-lists" , # Better list handling (key for AI content!)
251+ "header-ids" , # Add IDs to headers
252+ "markdown-in-html" , # Allow markdown inside HTML
253+ "breaks" , # Handle line breaks better
254+ ],
255+ )
256+
257+ if self .template_env :
258+ try :
259+ theme_settings = self .themes .get (theme , self .themes ["default" ])
260+ template = self .template_env .get_template ("email_template.html" )
261+
262+ return template .render (content = html_content , theme = theme_settings )
263+ except Exception as e :
264+ logger .error (f"Template rendering failed: { e } . Falling back to basic rendering." )
265+
266+ # fallback
267+ logger .info ("Template environment not available. Using basic HTML rendering." )
268+ return self ._basic_html_render (html_content )
269+
270+ def _fix_ai_markdown (self , content : str ) -> str :
271+ """
272+ Fix AI-generated markdown issues that markdown2 doesn't handle.
273+ Only includes fixes that are actually necessary with markdown2's cuddled-lists extra.
271274
272- if self .template_env :
273- try :
274- theme_settings = self .themes .get (theme , self .themes ["default" ])
275- template = self .template_env .get_template ("email_template.html" )
275+ Args:
276+ content: Raw markdown content
276277
277- return template .render (content = html_content , theme = theme_settings )
278- except Exception as e :
279- logger .error (f"Template rendering failed: { e } . Falling back to basic rendering." )
278+ Returns:
279+ Fixed markdown content
280280
281- # fallback
282- logger .info ("Template environment not available. Using basic HTML rendering." )
283- return self ._basic_html_render (html_content , theme )
281+ """
282+ # Fix missing spaces after list markers, but convert section headers to proper headers
283+ lines = content .split ("\n " )
284+ result_lines = []
285+
286+ for line in lines :
287+ # Check if this line looks like a list item without proper spacing
288+ if re .match (r"^(\s*)(\d+\.|\*|-|\+)([^\s])" , line ):
289+ # Get the indentation, marker, and text
290+ match = re .match (r"^(\s*)(\d+\.|\*|-|\+)(.*)$" , line )
291+ if match :
292+ indent , marker , rest_of_line = match .groups ()
293+
294+ # Check if this is likely a section header vs a real list item
295+ if marker .endswith ("." ) and self ._is_section_header (rest_of_line .strip ()):
296+ # Convert to a proper markdown header
297+ header_text = rest_of_line .strip ()
298+ line = f"## { header_text } "
299+ else :
300+ # This is a real list item, fix the spacing
301+ line = f"{ indent } { marker } { rest_of_line .lstrip ()} "
302+
303+ result_lines .append (line )
304+
305+ content = "\n " .join (result_lines )
306+
307+ # Convert letter-based lists to numbers (no markdown parser handles this)
308+ return self ._convert_letter_lists_to_numbers (content )
309+
310+ def _is_section_header (self , text : str ) -> bool :
311+ """
312+ Simple keyword-based check for section headers.
313+ """
314+ text_lower = text .lower ()
315+ section_keywords = [
316+ "acknowledgment" ,
317+ "understanding" ,
318+ "summary" ,
319+ "response" ,
320+ "detailed" ,
321+ "top 10" ,
322+ "posts" ,
323+ "trending" ,
324+ "with summaries" ,
325+ ]
326+ return any (keyword in text_lower for keyword in section_keywords )
284327
285- except ImportError :
286- logger .error ("Markdown package not available - this should never happen as it's a required dependency" )
287- raise # We should always have markdown package available
328+ def _convert_letter_lists_to_numbers (self , content : str ) -> str :
329+ """
330+ Convert letter-based list markers (a., b., c.) to numbers (1., 2., 3.)
331+ so they can be properly parsed as nested ordered lists.
332+ CSS will handle styling them back to letters.
333+ """
334+ lines = content .split ("\n " )
335+ result_lines = []
336+
337+ for line in lines :
338+ # Match lines that start with letter-based list markers
339+ match = re .match (r"^(\s*)([a-z])\.\s+(.*)$" , line )
340+ if match :
341+ indent , letter , text = match .groups ()
342+ # Convert letter to number (a=1, b=2, c=3, etc.)
343+ number = ord (letter ) - ord ("a" ) + 1
344+ # Replace with number-based marker
345+ line = f"{ indent } { number } . { text } "
346+
347+ result_lines .append (line )
348+
349+ return "\n " .join (result_lines )
288350
289351 def _basic_html_render (self , html_content : str ) -> str :
290352 """
0 commit comments