Skip to content

Commit 2eb0b25

Browse files
badGarnetqued
andauthored
Feat: single table structure eval metric (#2655)
Creates a compounding metric to represent table structure score. It is an average of existing row and col index and content score. This PR adds a new property to `unstructured.metrics.table_eval.TableEvaluation`: `composite_structure_acc`, which is computed from the element level row and column index and content accuracy scores. This new metric is meant to offer a single number to represent the performance of table structure extraction model/algorithms. This PR also refactors the eval computation logic so it uses a constant `table_eval_metrics` instead of hard coding the name of the metrics in multiple places in the code. --------- Co-authored-by: qued <[email protected]>
1 parent 1af41d5 commit 2eb0b25

File tree

4 files changed

+23
-30
lines changed

4 files changed

+23
-30
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
### Enhancements
44

55
* **Add `.metadata.is_continuation` to text-split chunks.** `.metadata.is_continuation=True` is added to second-and-later chunks formed by text-splitting an oversized `Table` element but not to their counterpart `Text` element splits. Add this indicator for `CompositeElement` to allow text-split continuation chunks to be identified for downstream processes that may wish to skip intentionally redundant metadata values in continuation chunks.
6+
* **Add `compound_structure_acc` metric to table eval.** Add a new property to `unstructured.metrics.table_eval.TableEvaluation`: `composite_structure_acc`, which is computed from the element level row and column index and content accuracy scores
67

78
### Features
89

test_unstructured/metrics/test_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def test_table_structure_evaluation():
135135
assert os.path.isfile(os.path.join(export_dir, "aggregate-table-structure-accuracy.tsv"))
136136
df = pd.read_csv(os.path.join(export_dir, "all-docs-table-structure-accuracy.tsv"), sep="\t")
137137
assert len(df) == 1
138-
assert len(df.columns) == 9
138+
assert len(df.columns) == 10
139139
assert df.iloc[0].filename == "IRS-2023-Form-1095-A.pdf"
140140

141141

unstructured/metrics/evaluate.py

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@
4242
logger.setLevel(logging.DEBUG)
4343

4444
agg_headers = ["metric", "average", "sample_sd", "population_sd", "count"]
45+
table_eval_metrics = [
46+
"total_tables",
47+
"table_level_acc",
48+
"composite_structure_acc",
49+
"element_col_level_index_acc",
50+
"element_row_level_index_acc",
51+
"element_col_level_content_acc",
52+
"element_row_level_content_acc",
53+
]
4554

4655

4756
def measure_text_extraction_accuracy(
@@ -332,50 +341,25 @@ def measure_table_structure_accuracy(
332341
out_filename,
333342
doctype,
334343
connector,
335-
report.total_tables,
336-
report.table_level_acc,
337-
report.element_col_level_index_acc,
338-
report.element_row_level_index_acc,
339-
report.element_col_level_content_acc,
340-
report.element_row_level_content_acc,
341344
]
345+
+ [getattr(report, metric) for metric in table_eval_metrics]
342346
)
343347

344348
headers = [
345349
"filename",
346350
"doctype",
347351
"connector",
348-
"total_tables",
349-
"table_level_acc",
350-
"element_col_level_index_acc",
351-
"element_row_level_index_acc",
352-
"element_col_level_content_acc",
353-
"element_row_level_content_acc",
354-
]
352+
] + table_eval_metrics
355353
df = pd.DataFrame(rows, columns=headers)
356354
has_tables_df = df[df["total_tables"] > 0]
357355

358356
if has_tables_df.empty:
359357
agg_df = pd.DataFrame(
360-
[
361-
["total_tables", None, None, None, 0],
362-
["table_level_acc", None, None, None, 0],
363-
["element_col_level_index_acc", None, None, None, 0],
364-
["element_row_level_index_acc", None, None, None, 0],
365-
["element_col_level_content_acc", None, None, None, 0],
366-
["element_row_level_content_acc", None, None, None, 0],
367-
]
358+
[[metric, None, None, None, 0] for metric in table_eval_metrics]
368359
).reset_index()
369360
else:
370361
element_metrics_results = {}
371-
for metric in [
372-
"total_tables",
373-
"table_level_acc",
374-
"element_col_level_index_acc",
375-
"element_row_level_index_acc",
376-
"element_col_level_content_acc",
377-
"element_row_level_content_acc",
378-
]:
362+
for metric in table_eval_metrics:
379363
metric_df = has_tables_df[has_tables_df[metric].notnull()]
380364
agg_metric = metric_df[metric].agg([_mean, _stdev, _pstdev, _count]).transpose()
381365
if agg_metric.empty:

unstructured/metrics/table/table_eval.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ class TableEvaluation:
4747
element_col_level_content_acc: float
4848
element_row_level_content_acc: float
4949

50+
@property
51+
def composite_structure_acc(self) -> float:
52+
return (
53+
self.element_col_level_index_acc
54+
+ self.element_row_level_index_acc
55+
+ (self.element_col_level_content_acc + self.element_row_level_content_acc) / 2
56+
) / 3
57+
5058

5159
def table_level_acc(predicted_table_data, ground_truth_table_data, matched_indices):
5260
"""computes for each predicted table its accurary compared to ground truth.

0 commit comments

Comments
 (0)