|
57 | 57 | NodeItem, |
58 | 58 | OrderedList, |
59 | 59 | PictureItem, |
| 60 | + PictureTabularChartData, |
60 | 61 | SectionHeaderItem, |
61 | 62 | TableCell, |
62 | 63 | TableItem, |
@@ -104,6 +105,9 @@ class HTMLParams(CommonParams): |
104 | 105 | # Allow for different output styles |
105 | 106 | output_style: HTMLOutputStyle = HTMLOutputStyle.SINGLE_COLUMN |
106 | 107 |
|
| 108 | + # Enable charts to be printed into HTML as tables |
| 109 | + enable_chart_tables: bool = True |
| 110 | + |
107 | 111 |
|
108 | 112 | class HTMLTextSerializer(BaseModel, BaseTextSerializer): |
109 | 113 | """HTML-specific text item serializer.""" |
@@ -402,9 +406,28 @@ def serialize( |
402 | 406 | and item.image.uri.scheme == "data" |
403 | 407 | ): |
404 | 408 | img_text = f'<img src="{quote(str(item.image.uri))}">' |
| 409 | + |
405 | 410 | if img_text: |
406 | 411 | res_parts.append(create_ser_result(text=img_text, span_source=item)) |
407 | 412 |
|
| 413 | + if params.enable_chart_tables: |
| 414 | + # Check if picture has attached PictureTabularChartData |
| 415 | + tabular_chart_annotations = [ |
| 416 | + ann |
| 417 | + for ann in item.annotations |
| 418 | + if isinstance(ann, PictureTabularChartData) |
| 419 | + ] |
| 420 | + if len(tabular_chart_annotations) > 0: |
| 421 | + temp_doc = DoclingDocument(name="temp") |
| 422 | + temp_table = temp_doc.add_table( |
| 423 | + data=tabular_chart_annotations[0].chart_data |
| 424 | + ) |
| 425 | + html_table_content = temp_table.export_to_html(temp_doc) |
| 426 | + if len(html_table_content) > 0: |
| 427 | + res_parts.append( |
| 428 | + create_ser_result(text=html_table_content, span_source=item) |
| 429 | + ) |
| 430 | + |
408 | 431 | text_res = "".join([r.text for r in res_parts]) |
409 | 432 | if text_res: |
410 | 433 | text_res = f"<figure>{text_res}</figure>" |
|
0 commit comments