Skip to content

Commit c56d14a

Browse files
committed
chore(webvtt): rebase to latest changes in idoctags
Signed-off-by: Cesar Berrospi Ramis <[email protected]>
1 parent 662d472 commit c56d14a

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

docling_core/experimental/idoctags.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def _create_location_tokens_for_item(
172172
return ""
173173
out: list[str] = []
174174
for prov in item.prov:
175+
if not isinstance(prov, ProvenanceItem):
176+
continue
175177
page_w, page_h = doc.pages[prov.page_no].size.as_tuple()
176178
bbox = prov.bbox.to_top_left_origin(page_h).as_tuple()
177179
out.append(
@@ -1260,12 +1262,13 @@ def serialize(
12601262
for idp, prov_ in enumerate(item.prov):
12611263
item_ = copy.deepcopy(item)
12621264
item_.prov = [prov_]
1263-
item_.text = item.orig[
1264-
prov_.charspan[0] : prov_.charspan[1]
1265-
] # it must be `orig`, not `text` here!
1266-
item_.orig = item.orig[prov_.charspan[0] : prov_.charspan[1]]
1265+
if isinstance(item, ProvenanceItem):
1266+
item_.text = item.orig[
1267+
prov_.charspan[0] : prov_.charspan[1]
1268+
] # it must be `orig`, not `text` here!
1269+
item_.orig = item.orig[prov_.charspan[0] : prov_.charspan[1]]
12671270

1268-
item_.prov[0].charspan = (0, len(item_.orig))
1271+
item_.prov[0].charspan = (0, len(item_.orig))
12691272

12701273
# marker field should be cleared on subsequent split parts
12711274
if idp > 0 and isinstance(item_, ListItem):
@@ -1667,7 +1670,7 @@ def _emit_otsl(
16671670

16681671
page_no = 0
16691672
if need_cell_loc:
1670-
if not item.prov:
1673+
if not item.prov or not isinstance(item.prov[0], ProvenanceItem):
16711674
raise ValueError(
16721675
"Per-cell location requested but table has no provenance (page_no)."
16731676
)
@@ -1877,6 +1880,8 @@ def serialize(
18771880
for it, _ in doc.iterate_items(root=item):
18781881
if isinstance(it, DocItem) and it.prov:
18791882
for prov in it.prov:
1883+
if not isinstance(prov, ProvenanceItem):
1884+
continue
18801885
page_w, page_h = doc.pages[prov.page_no].size.as_tuple()
18811886
boxes.append(prov.bbox.to_top_left_origin(page_h).as_tuple())
18821887
prov_page_w_h = (page_w, page_h, prov.page_no)

0 commit comments

Comments
 (0)