@@ -172,6 +172,8 @@ def _create_location_tokens_for_item(
172172 return ""
173173 out : list [str ] = []
174174 for prov in item .prov :
175+ if not isinstance (prov , ProvenanceItem ):
176+ continue
175177 page_w , page_h = doc .pages [prov .page_no ].size .as_tuple ()
176178 bbox = prov .bbox .to_top_left_origin (page_h ).as_tuple ()
177179 out .append (
@@ -1260,12 +1262,13 @@ def serialize(
12601262 for idp , prov_ in enumerate (item .prov ):
12611263 item_ = copy .deepcopy (item )
12621264 item_ .prov = [prov_ ]
1263- item_ .text = item .orig [
1264- prov_ .charspan [0 ] : prov_ .charspan [1 ]
1265- ] # it must be `orig`, not `text` here!
1266- item_ .orig = item .orig [prov_ .charspan [0 ] : prov_ .charspan [1 ]]
1265+ if isinstance (item , ProvenanceItem ):
1266+ item_ .text = item .orig [
1267+ prov_ .charspan [0 ] : prov_ .charspan [1 ]
1268+ ] # it must be `orig`, not `text` here!
1269+ item_ .orig = item .orig [prov_ .charspan [0 ] : prov_ .charspan [1 ]]
12671270
1268- item_ .prov [0 ].charspan = (0 , len (item_ .orig ))
1271+ item_ .prov [0 ].charspan = (0 , len (item_ .orig ))
12691272
12701273 # marker field should be cleared on subsequent split parts
12711274 if idp > 0 and isinstance (item_ , ListItem ):
@@ -1667,7 +1670,7 @@ def _emit_otsl(
16671670
16681671 page_no = 0
16691672 if need_cell_loc :
1670- if not item .prov :
1673+ if not item .prov or not isinstance ( item . prov [ 0 ], ProvenanceItem ) :
16711674 raise ValueError (
16721675 "Per-cell location requested but table has no provenance (page_no)."
16731676 )
@@ -1877,6 +1880,8 @@ def serialize(
18771880 for it , _ in doc .iterate_items (root = item ):
18781881 if isinstance (it , DocItem ) and it .prov :
18791882 for prov in it .prov :
1883+ if not isinstance (prov , ProvenanceItem ):
1884+ continue
18801885 page_w , page_h = doc .pages [prov .page_no ].size .as_tuple ()
18811886 boxes .append (prov .bbox .to_top_left_origin (page_h ).as_tuple ())
18821887 prov_page_w_h = (page_w , page_h , prov .page_no )
0 commit comments