@@ -1393,6 +1393,17 @@ class DoclingDocument(BaseModel):
13931393 math annotation {
13941394 display: none;
13951395 }
1396+ .formula-not-decoded {
1397+ background: repeating-linear-gradient(
1398+ 45deg, /* Angle of the stripes */
1399+ LightGray, /* First color */
1400+ LightGray 10px, /* Length of the first color */
1401+ White 10px, /* Second color */
1402+ White 20px /* Length of the second color */
1403+ );
1404+ margin: 0;
1405+ text-align: center;
1406+ }
13961407 </style>
13971408 </head>"""
13981409
@@ -2216,11 +2227,18 @@ def _append_text(text: str, do_escape_html=True, do_escape_underscores=True):
22162227
22172228 elif isinstance (item , TextItem ) and item .label in [DocItemLabel .FORMULA ]:
22182229 in_list = False
2219- _append_text (
2220- f"$${ item .text } $$\n " ,
2221- do_escape_underscores = False ,
2222- do_escape_html = False ,
2223- )
2230+ if item .text != "" :
2231+ _append_text (
2232+ f"$${ item .text } $$\n " ,
2233+ do_escape_underscores = False ,
2234+ do_escape_html = False ,
2235+ )
2236+ elif item .orig != "" :
2237+ _append_text (
2238+ "<!-- formula-not-decoded -->\n " ,
2239+ do_escape_underscores = False ,
2240+ do_escape_html = False ,
2241+ )
22242242
22252243 elif isinstance (item , TextItem ) and item .label in labels :
22262244 in_list = False
@@ -2467,9 +2485,27 @@ def _prepare_tag_content(
24672485 math_formula = _prepare_tag_content (
24682486 item .text , do_escape_html = False , do_replace_newline = False
24692487 )
2470- if formula_to_mathml :
2471- # Building a math equation in MathML format
2472- # ref https://www.w3.org/TR/wai-aria-1.1/#math
2488+ text = ""
2489+
2490+ # If the formula is not processed correcty, use its image
2491+ if (
2492+ item .text == ""
2493+ and item .orig != ""
2494+ and image_mode == ImageRefMode .EMBEDDED
2495+ and len (item .prov ) > 0
2496+ ):
2497+ item_image = item .get_image (doc = self )
2498+ if item_image is not None :
2499+ img_ref = ImageRef .from_pil (item_image , dpi = 72 )
2500+ text = (
2501+ "<figure>"
2502+ f'<img src="{ img_ref .uri } " alt="{ item .orig } " />'
2503+ "</figure>"
2504+ )
2505+
2506+ # Building a math equation in MathML format
2507+ # ref https://www.w3.org/TR/wai-aria-1.1/#math
2508+ elif formula_to_mathml :
24732509 mathml_element = latex2mathml .converter .convert_to_element (
24742510 math_formula , display = "block"
24752511 )
@@ -2480,9 +2516,15 @@ def _prepare_tag_content(
24802516 mathml = unescape (tostring (mathml_element , encoding = "unicode" ))
24812517 text = f"<div>{ mathml } </div>"
24822518
2483- else :
2519+ elif math_formula != "" :
24842520 text = f"<pre>{ math_formula } </pre>"
2485- html_texts .append (text )
2521+
2522+ if text != "" :
2523+ html_texts .append (text )
2524+ else :
2525+ html_texts .append (
2526+ '<div class="formula-not-decoded">Formula not decoded</div>'
2527+ )
24862528
24872529 elif isinstance (item , ListItem ):
24882530
0 commit comments