|
1 | 1 | """Define classes for Doctags serialization.""" |
2 | 2 |
|
3 | | -import html |
4 | 3 | from enum import Enum |
5 | | -from pathlib import Path |
6 | 4 | from typing import Dict, List, Optional, Union |
7 | 5 |
|
8 | | -from pydantic import AnyUrl, BaseModel |
| 6 | +from pydantic import BaseModel |
9 | 7 | from typing_extensions import override |
10 | 8 |
|
11 | 9 | from docling_core.experimental.serializer.base import ( |
|
25 | 23 | CodeItem, |
26 | 24 | DocItem, |
27 | 25 | DoclingDocument, |
28 | | - Formatting, |
29 | 26 | FormItem, |
30 | 27 | InlineGroup, |
31 | 28 | KeyValueItem, |
@@ -112,7 +109,6 @@ def serialize( |
112 | 109 | text_part = item.text |
113 | 110 | text_part = doc_serializer.post_process( |
114 | 111 | text=text_part, |
115 | | - escape_html=False, # TODO review |
116 | 112 | formatting=item.formatting, |
117 | 113 | hyperlink=item.hyperlink, |
118 | 114 | ) |
@@ -456,26 +452,6 @@ class DocTagsDocSerializer(DocSerializer): |
456 | 452 |
|
457 | 453 | params: DocTagsParams = DocTagsParams() |
458 | 454 |
|
459 | | - def post_process( |
460 | | - self, |
461 | | - text: str, |
462 | | - *, |
463 | | - escape_html: bool = True, |
464 | | - formatting: Optional[Formatting] = None, |
465 | | - hyperlink: Optional[Union[AnyUrl, Path]] = None, |
466 | | - **kwargs, |
467 | | - ) -> str: |
468 | | - """Apply some text post-processing steps.""" |
469 | | - res = text |
470 | | - if escape_html: |
471 | | - res = html.escape(res, quote=False) |
472 | | - res = super().post_process( |
473 | | - text=res, |
474 | | - formatting=formatting, |
475 | | - hyperlink=hyperlink, |
476 | | - ) |
477 | | - return res |
478 | | - |
479 | 455 | @override |
480 | 456 | def serialize_page(self, parts: list[SerializationResult]) -> SerializationResult: |
481 | 457 | """Serialize a page out of its parts.""" |
|
0 commit comments