File tree Expand file tree Collapse file tree 7 files changed +27
-43
lines changed
docling/models/tableformer Expand file tree Collapse file tree 7 files changed +27
-43
lines changed Original file line number Diff line number Diff line change @@ -336,14 +336,6 @@ def create_dpbench_e2e_dataset(
336336 )
337337
338338
339- def create_dpbench_layout_dataset (
340- dpbench_dir : Path , output_dir : Path , image_scale : float = 1.0
341- ):
342- create_dpbench_e2e_dataset (
343- dpbench_dir = dpbench_dir , output_dir = output_dir , image_scale = image_scale
344- )
345-
346-
347339def create_dpbench_tableformer_dataset (
348340 dpbench_dir : Path , output_dir : Path , image_scale : float = 1.0
349341):
@@ -391,7 +383,7 @@ def create_dpbench_tableformer_dataset(
391383
392384 # Create the updated Document
393385 updated , pred_doc = tf_updater .replace_tabledata (
394- pdf_path = pdf_path , true_doc = true_doc # , true_page_images=true_page_images
386+ pdf_path = pdf_path , true_doc = true_doc
395387 )
396388
397389 if updated :
Original file line number Diff line number Diff line change @@ -360,16 +360,6 @@ def create_omnidocbench_e2e_dataset(
360360 )
361361
362362
363- def create_omnidocbench_layout_dataset (
364- omnidocbench_dir : Path , output_dir : Path , image_scale : float = 1.0
365- ):
366- create_omnidocbench_e2e_dataset (
367- omnidocbench_dir = omnidocbench_dir ,
368- output_dir = output_dir ,
369- image_scale = image_scale ,
370- )
371-
372-
373363def create_omnidocbench_tableformer_dataset (
374364 omnidocbench_dir : Path , output_dir : Path , image_scale : float = 1.0
375365):
@@ -427,7 +417,7 @@ def create_omnidocbench_tableformer_dataset(
427417 )
428418
429419 updated , pred_doc = tf_updater .replace_tabledata (
430- pdf_path = pdf_path , true_doc = true_doc , true_page_images = true_page_images
420+ pdf_path = pdf_path , true_doc = true_doc
431421 )
432422
433423 if updated :
Original file line number Diff line number Diff line change @@ -93,24 +93,25 @@ def add_pages_to_true_doc(
9393 page_width , page_height = page .size .width , page .size .height
9494
9595 page_image = page .get_image (scale = image_scale )
96- page_images .append (page_image )
97- page ._backend .unload ()
96+ if page_image is not None :
97+ page_images .append (page_image )
98+ image_ref = ImageRef (
99+ mimetype = "image/png" ,
100+ dpi = round (72 * image_scale ),
101+ size = Size (
102+ width = float (page_image .width ), height = float (page_image .height )
103+ ),
104+ uri = Path (f"{ BenchMarkColumns .PAGE_IMAGES } /{ page_no } " ),
105+ )
106+ page_item = PageItem (
107+ page_no = page_no + 1 ,
108+ size = Size (width = float (page_width ), height = float (page_height )),
109+ image = image_ref ,
110+ )
98111
99- image_ref = ImageRef (
100- mimetype = "image/png" ,
101- dpi = round (72 * image_scale ),
102- size = Size (
103- width = float (page_image .width ), height = float (page_image .height )
104- ),
105- uri = Path (f"{ BenchMarkColumns .PAGE_IMAGES } /{ page_no } " ),
106- )
107- page_item = PageItem (
108- page_no = page_no + 1 ,
109- size = Size (width = float (page_width ), height = float (page_height )),
110- image = image_ref ,
111- )
112+ true_doc .pages [page_no + 1 ] = page_item
112113
113- true_doc . pages [ page_no + 1 ] = page_item
114+ page . _backend . unload ()
114115
115116 return true_doc , page_images
116117
Original file line number Diff line number Diff line change 1414 create_dpbench_tableformer_dataset ,
1515)
1616from docling_eval .benchmarks .omnidocbench .create import (
17- create_omnidocbench_layout_dataset ,
17+ create_omnidocbench_e2e_dataset ,
1818 create_omnidocbench_tableformer_dataset ,
1919)
2020from docling_eval .benchmarks .tableformer_huggingface_otsl .create import (
@@ -87,7 +87,7 @@ def create(
8787 modality == EvaluationModality .END2END
8888 or modality == EvaluationModality .LAYOUT
8989 ):
90- create_omnidocbench_layout_dataset (
90+ create_omnidocbench_e2e_dataset (
9191 omnidocbench_dir = idir , output_dir = odir , image_scale = image_scale
9292 )
9393 elif modality == EvaluationModality .TABLEFORMER :
Original file line number Diff line number Diff line change @@ -113,6 +113,7 @@ def to_np(pil_image: Image.Image):
113113 else :
114114 raise ValueError ("Unsupported image format" )
115115
116+
116117# TODO: This method must be dropped.
117118def tf_predict_with_page_tokens (
118119 config ,
@@ -247,7 +248,6 @@ def replace_tabledata(
247248 self ,
248249 pdf_path : Path ,
249250 true_doc : DoclingDocument ,
250- # true_page_images: List[Image.Image],
251251 ) -> Tuple [bool , DoclingDocument ]:
252252
253253 updated = False
Original file line number Diff line number Diff line change 66
77from docling_eval .benchmarks .constants import BenchMarkNames , EvaluationModality
88from docling_eval .benchmarks .dpbench .create import (
9- create_dpbench_layout_dataset ,
9+ create_dpbench_e2e_dataset ,
1010 create_dpbench_tableformer_dataset ,
1111)
1212from docling_eval .cli .main import evaluate , visualise
@@ -44,7 +44,7 @@ def main():
4444 image_scale = 1.0
4545
4646 if True :
47- create_dpbench_layout_dataset (
47+ create_dpbench_e2e_dataset (
4848 dpbench_dir = idir , output_dir = odir_lay , image_scale = image_scale
4949 )
5050
Original file line number Diff line number Diff line change 66
77from docling_eval .benchmarks .constants import BenchMarkNames , EvaluationModality
88from docling_eval .benchmarks .omnidocbench .create import (
9- create_omnidocbench_layout_dataset ,
9+ create_omnidocbench_e2e_dataset ,
1010 create_omnidocbench_tableformer_dataset ,
1111)
1212from docling_eval .cli .main import evaluate , visualise
@@ -44,7 +44,8 @@ def main():
4444 image_scale = 1.0
4545
4646 if True :
47- create_omnidocbench_layout_dataset (
47+
48+ create_omnidocbench_e2e_dataset (
4849 omnidocbench_dir = idir , output_dir = odir_lay , image_scale = image_scale
4950 )
5051
You can’t perform that action at this time.
0 commit comments