Skip to content

Commit 246627f

Browse files
authored
fix: make doc metadata keys pure strings (#38)
Signed-off-by: Panos Vagenas <[email protected]>
1 parent b5592ad commit 246627f

File tree

1 file changed

+7
-9
lines changed

1 file changed

+7
-9
lines changed

docling_core/transforms/metadata_extractor/simple_metadata_extractor.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,18 @@
66
"""Simple metadata extractor module."""
77

88

9-
from enum import Enum
109
from typing import Any
1110

1211
from docling_core.transforms.metadata_extractor import BaseMetadataExtractor
1312
from docling_core.types import Document as DLDocument
1413

14+
_DL_DOC_HASH = "dl_doc_hash"
15+
_ORIGIN = "origin"
16+
1517

1618
class SimpleMetadataExtractor(BaseMetadataExtractor):
1719
"""Simple metadata extractor class."""
1820

19-
class _Keys(str, Enum):
20-
DL_DOC_HASH = "dl_doc_hash"
21-
ORIGIN = "origin"
22-
2321
include_origin: bool = False
2422

2523
def get_metadata(
@@ -35,10 +33,10 @@ def get_metadata(
3533
dict[str, Any]: the extracted metadata
3634
"""
3735
meta: dict[str, Any] = {
38-
self._Keys.DL_DOC_HASH: doc.file_info.document_hash,
36+
_DL_DOC_HASH: doc.file_info.document_hash,
3937
}
4038
if self.include_origin:
41-
meta[self._Keys.ORIGIN] = origin
39+
meta[_ORIGIN] = origin
4240
return meta
4341

4442
def get_excluded_embed_metadata_keys(self) -> list[str]:
@@ -47,9 +45,9 @@ def get_excluded_embed_metadata_keys(self) -> list[str]:
4745
Returns:
4846
list[str]: the metadata to exclude
4947
"""
50-
excl_keys: list[str] = [self._Keys.DL_DOC_HASH]
48+
excl_keys: list[str] = [_DL_DOC_HASH]
5149
if self.include_origin:
52-
excl_keys.append(self._Keys.ORIGIN)
50+
excl_keys.append(_ORIGIN)
5351
return excl_keys
5452

5553
def get_excluded_llm_metadata_keys(self) -> list[str]:

0 commit comments

Comments
 (0)