@@ -2115,10 +2115,30 @@ def export_to_markdown( # noqa: C901
21152115 # Bold, Italic, or Bold-Italic
21162116 # Hence, any underscore that we print into Markdown is coming from document text
21172117 # That means we need to escape it, to properly reflect content in the markdown
2118+ # However, we need to preserve underscores in image URLs
2119+ # to maintain their validity
2120+ # For example:  should remain unchanged
21182121 def escape_underscores (text ):
2119- # Replace "_" with "\_" only if it's not already escaped
2120- escaped_text = re .sub (r"(?<!\\)_" , r"\_" , text )
2121- return escaped_text
2122+ """Escape underscores but leave them intact in the URL.."""
2123+ # Firstly, identify all the URL patterns.
2124+ url_pattern = r"!\[.*?\]\((.*?)\)"
2125+ parts = []
2126+ last_end = 0
2127+
2128+ for match in re .finditer (url_pattern , text ):
2129+ # Text to add before the URL (needs to be escaped)
2130+ before_url = text [last_end : match .start ()]
2131+ parts .append (re .sub (r"(?<!\\)_" , r"\_" , before_url ))
2132+
2133+ # Add the full URL part (do not escape)
2134+ parts .append (match .group (0 ))
2135+ last_end = match .end ()
2136+
2137+ # Add the final part of the text (which needs to be escaped)
2138+ if last_end < len (text ):
2139+ parts .append (re .sub (r"(?<!\\)_" , r"\_" , text [last_end :]))
2140+
2141+ return "" .join (parts )
21222142
21232143 mdtext = escape_underscores (mdtext )
21242144
0 commit comments