Skip to content

Commit bcace5d

Browse files
authored
perf(serialization): cache excluded references (#214)
Signed-off-by: Panos Vagenas <[email protected]>
1 parent ff13a93 commit bcace5d

File tree

1 file changed

+28
-23
lines changed
  • docling_core/experimental/serializer

1 file changed

+28
-23
lines changed

docling_core/experimental/serializer/common.py

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -94,38 +94,43 @@ class Config:
9494

9595
params: CommonParams = CommonParams()
9696

97-
# TODO add cache based on start-stop params
97+
_excluded_refs_cache: dict[str, list[str]] = {}
98+
9899
@override
99100
def get_excluded_refs(self, **kwargs) -> list[str]:
100101
"""References to excluded items."""
101102
params = self.params.merge_with_patch(patch=kwargs)
102-
refs: list[str] = [
103-
item.self_ref
104-
for ix, (item, _) in enumerate(
105-
self.doc.iterate_items(
106-
with_groups=True,
107-
traverse_pictures=True,
108-
included_content_layers=params.layers,
103+
params_json = params.model_dump_json()
104+
refs = self._excluded_refs_cache.get(params_json)
105+
if refs is None:
106+
refs = [
107+
item.self_ref
108+
for ix, (item, _) in enumerate(
109+
self.doc.iterate_items(
110+
with_groups=True,
111+
traverse_pictures=True,
112+
included_content_layers=params.layers,
113+
)
109114
)
110-
)
111-
if (
112-
(ix < params.start_idx or ix >= params.stop_idx)
113-
or (
114-
isinstance(item, DocItem)
115-
and (
116-
item.label not in params.labels
117-
or item.content_layer not in params.layers
118-
or (
119-
params.pages is not None
120-
and (
121-
(not item.prov)
122-
or item.prov[0].page_no not in params.pages
115+
if (
116+
(ix < params.start_idx or ix >= params.stop_idx)
117+
or (
118+
isinstance(item, DocItem)
119+
and (
120+
item.label not in params.labels
121+
or item.content_layer not in params.layers
122+
or (
123+
params.pages is not None
124+
and (
125+
(not item.prov)
126+
or item.prov[0].page_no not in params.pages
127+
)
123128
)
124129
)
125130
)
126131
)
127-
)
128-
]
132+
]
133+
self._excluded_refs_cache[params_json] = refs
129134
return refs
130135

131136
@abstractmethod

0 commit comments

Comments
 (0)