Skip to content

Commit b3caa64

Browse files
Merge remote-tracking branch 'origin/main'
merge
2 parents 5243532 + 05001a8 commit b3caa64

File tree

3 files changed

+14
-4
lines changed

3 files changed

+14
-4
lines changed

kensho_kenverters/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## v1.2.8
4+
5+
* Fixing edge case where we want to build a table with only figure extracted table annotations or only table annotations
6+
* Making build_content_grid_from_figure_extracted_table_cell_annotations public
7+
38
## v1.2.7
49

510
* Fixing PyPI version mismatch - no code changes

kensho_kenverters/output_to_tables.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,12 @@ def get_table_uid_to_annotations_mapping(
102102
table_to_annotations = {}
103103
for table_uid, cells in table_uid_to_cells.items():
104104
cell_uids = [cell.uid for cell in cells]
105-
table_to_annotations[table_uid] = [uid_to_annotation[uid] for uid in cell_uids]
105+
# It's possible that we're only passing in table structure annotations or only
106+
# figure table structure annotations. In that case, we only want to keep the
107+
# annotations that match the cell uids.
108+
table_to_annotations[table_uid] = [
109+
uid_to_annotation[uid] for uid in cell_uids if uid in uid_to_annotation
110+
]
106111
return table_to_annotations
107112

108113

@@ -139,7 +144,7 @@ def build_uids_grid_from_table_cell_annotations(
139144
return rows
140145

141146

142-
def _build_content_grid_from_figure_extracted_table_cell_annotations(
147+
def build_content_grid_from_figure_extracted_table_cell_annotations(
143148
annotations: Sequence[AnnotationModel],
144149
) -> list[list[str]]:
145150
"""Build content grid where each location has a string of content."""
@@ -263,7 +268,7 @@ def build_table_grids(
263268
tables[table_uid] = (table_uid_to_type_mapping[table_uid], content_grid)
264269
else:
265270
content_grid = (
266-
_build_content_grid_from_figure_extracted_table_cell_annotations(
271+
build_content_grid_from_figure_extracted_table_cell_annotations(
267272
cell_annotations
268273
)
269274
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "kensho_kenverters"
3-
version = "1.2.7"
3+
version = "1.2.8"
44
description = "Extract Output Translator Tools"
55
readme = "README.md"
66
authors = ["Valerie Faucon-Morin <valerie.fauconmorin@kensho.com>"]

0 commit comments

Comments
 (0)