@@ -138,17 +138,19 @@ class ObjectType(Enum):
138
138
added_objects = set () # set of object types todo mypy
139
139
for idx , mask_char in enumerate (mask_chars ):
140
140
object_type , object_idx = mask_char
141
- if object_idx is None :
142
- raise ValueError ("object_idx should not be None" )
143
141
if object_type == ObjectType .NONE :
144
142
page_text += form_recognizer_results .content [page_offset + idx ]
145
143
elif object_type == ObjectType .TABLE :
144
+ if object_idx is None :
145
+ raise ValueError ("Expected object_idx to be set" )
146
146
if mask_char not in added_objects :
147
147
page_text += DocumentAnalysisParser .table_to_html (tables_on_page [object_idx ])
148
148
added_objects .add (mask_char )
149
149
elif object_type == ObjectType .FIGURE :
150
150
if cu_describer is None :
151
151
raise ValueError ("cu_describer should not be None, unable to describe figure" )
152
+ if object_idx is None :
153
+ raise ValueError ("Expected object_idx to be set" )
152
154
if mask_char not in added_objects :
153
155
figure_html = await DocumentAnalysisParser .figure_to_html (
154
156
doc_for_pymupdf , cu_describer , figures_on_page [object_idx ]
@@ -176,7 +178,7 @@ async def figure_to_html(
176
178
doc : pymupdf .Document , cu_describer : ContentUnderstandingDescriber , figure : DocumentFigure
177
179
) -> str :
178
180
figure_title = (figure .caption and figure .caption .content ) or ""
179
- logger .info ("Describing figure '%s' with title" , figure .id , figure_title )
181
+ logger .info ("Describing figure %s with title '%s' " , figure .id , figure_title )
180
182
if not figure .bounding_regions :
181
183
return f"<figure><figcaption>{ figure_title } </figcaption></figure>"
182
184
for region in figure .bounding_regions :
0 commit comments