@@ -28,9 +28,16 @@ def mock_image():
2828
2929@pytest .fixture ()
3030def mock_initial_layout ():
31- text_block = layout .EmbeddedTextRegion (2 , 4 , 6 , 8 , text = "A very repetitive narrative. " * 10 )
31+ text_block = layout .EmbeddedTextRegion (
32+ 2 ,
33+ 4 ,
34+ 6 ,
35+ 8 ,
36+ text = "A very repetitive narrative. " * 10 ,
37+ source = "Mock" ,
38+ )
3239
33- title_block = layout .EmbeddedTextRegion (1 , 2 , 3 , 4 , text = "A Catchy Title" )
40+ title_block = layout .EmbeddedTextRegion (1 , 2 , 3 , 4 , text = "A Catchy Title" , source = "Mock" )
3441
3542 return [text_block , title_block ]
3643
@@ -42,11 +49,20 @@ def mock_final_layout():
4249 4 ,
4350 6 ,
4451 8 ,
52+ source = "Mock" ,
4553 text = "A very repetitive narrative. " * 10 ,
4654 type = "NarrativeText" ,
4755 )
4856
49- title_block = layoutelement .LayoutElement (1 , 2 , 3 , 4 , text = "A Catchy Title" , type = "Title" )
57+ title_block = layoutelement .LayoutElement (
58+ 1 ,
59+ 2 ,
60+ 3 ,
61+ 4 ,
62+ source = "Mock" ,
63+ text = "A Catchy Title" ,
64+ type = "Title" ,
65+ )
5066
5167 return [text_block , title_block ]
5268
@@ -709,8 +725,11 @@ def test_load_pdf_with_multicolumn_layout_and_ocr(filename="sample-docs/design-t
709725 assert element .text .startswith (test_snippets [i ])
710726
711727
712- @pytest .mark .parametrize ("colors" , ["red" , None ])
713- def test_annotate (colors ):
728+ @pytest .mark .parametrize (
729+ ("colors" , "add_details" , "threshold" ),
730+ [("red" , False , 0.992 ), (None , False , 0.992 ), ("red" , True , 0.8 )],
731+ )
732+ def test_annotate (colors , add_details , threshold ):
714733 def check_annotated_image ():
715734 annotated_array = np .array (annotated_image )
716735 for coords in [coords1 , coords2 ]:
@@ -722,9 +741,9 @@ def check_annotated_image():
722741 assert all (annotated_array [y1 :y2 , x1 , i ] == expected )
723742 assert all (annotated_array [y1 :y2 , x2 , i ] == expected )
724743 # Make sure almost all the pixels are not changed
725- assert ((annotated_array [:, :, 0 ] == 1 ).mean ()) > 0.992
726- assert ((annotated_array [:, :, 1 ] == 1 ).mean ()) > 0.992
727- assert ((annotated_array [:, :, 2 ] == 1 ).mean ()) > 0.992
744+ assert ((annotated_array [:, :, 0 ] == 1 ).mean ()) > threshold
745+ assert ((annotated_array [:, :, 1 ] == 1 ).mean ()) > threshold
746+ assert ((annotated_array [:, :, 2 ] == 1 ).mean ()) > threshold
728747
729748 test_image_arr = np .ones ((100 , 100 , 3 ), dtype = "uint8" )
730749 image = Image .fromarray (test_image_arr )
@@ -735,15 +754,18 @@ def check_annotated_image():
735754 rect2 = elements .Rectangle (* coords2 )
736755 page .elements = [rect1 , rect2 ]
737756
757+ annotated_image = page .annotate (colors = colors , add_details = add_details , sources = ["all" ])
758+ check_annotated_image ()
759+
738760 # Scenario 1: where self.image exists
739- annotated_image = page .annotate (colors = colors )
761+ annotated_image = page .annotate (colors = colors , add_details = add_details )
740762 check_annotated_image ()
741763
742764 # Scenario 2: where self.image is None, but self.image_path exists
743765 with patch .object (Image , "open" , return_value = image ):
744766 page .image = None
745767 page .image_path = "mock_path_to_image"
746- annotated_image = page .annotate (colors = colors )
768+ annotated_image = page .annotate (colors = colors , add_details = add_details )
747769 check_annotated_image ()
748770
749771
@@ -775,32 +797,30 @@ def test_image_text_region(text, ocr_strategy, expected, mock_image):
775797 )
776798
777799
778- @pytest .fixture ()
779- def ordering_layout ():
780- elements = [
781- layout .LayoutElement (x1 = 447.0 , y1 = 315.0 , x2 = 1275.7 , y2 = 413.0 , text = "0" ),
782- layout .LayoutElement (x1 = 380.6 , y1 = 473.4 , x2 = 1334.8 , y2 = 533.9 , text = "1" ),
783- layout .LayoutElement (x1 = 578.6 , y1 = 556.8 , x2 = 1109.0 , y2 = 874.4 , text = "2" ),
784- layout .LayoutElement (x1 = 444.5 , y1 = 942.3 , x2 = 1261.1 , y2 = 1584.1 , text = "3" ),
785- layout .LayoutElement (x1 = 444.8 , y1 = 1609.4 , x2 = 1257.2 , y2 = 1665.2 , text = "4" ),
786- layout .LayoutElement (x1 = 414.0 , y1 = 1718.8 , x2 = 635.0 , y2 = 1755.2 , text = "5" ),
787- layout .LayoutElement (x1 = 372.6 , y1 = 1786.9 , x2 = 1333.6 , y2 = 1848.7 , text = "6" ),
788- ]
789- return elements
800+ class MockDetectionModel (layout .UnstructuredObjectDetectionModel ):
801+ def initialize (self , * args , ** kwargs ):
802+ pass
803+
804+ def predict (self , x ):
805+ return [
806+ layout .LayoutElement (x1 = 447.0 , y1 = 315.0 , x2 = 1275.7 , y2 = 413.0 , text = "0" ),
807+ layout .LayoutElement (x1 = 380.6 , y1 = 473.4 , x2 = 1334.8 , y2 = 533.9 , text = "1" ),
808+ layout .LayoutElement (x1 = 578.6 , y1 = 556.8 , x2 = 1109.0 , y2 = 874.4 , text = "2" ),
809+ layout .LayoutElement (x1 = 444.5 , y1 = 942.3 , x2 = 1261.1 , y2 = 1584.1 , text = "3" ),
810+ layout .LayoutElement (x1 = 444.8 , y1 = 1609.4 , x2 = 1257.2 , y2 = 1665.2 , text = "4" ),
811+ layout .LayoutElement (x1 = 414.0 , y1 = 1718.8 , x2 = 635.0 , y2 = 1755.2 , text = "5" ),
812+ layout .LayoutElement (x1 = 372.6 , y1 = 1786.9 , x2 = 1333.6 , y2 = 1848.7 , text = "6" ),
813+ ]
790814
791815
792- def test_layout_order (mock_image , ordering_layout ):
816+ def test_layout_order (mock_image ):
793817 with tempfile .TemporaryDirectory () as tmpdir :
794818 mock_image_path = os .path .join (tmpdir , "mock.jpg" )
795819 mock_image .save (mock_image_path )
796- with patch .object (layout , "get_model" , lambda : lambda x : ordering_layout ), patch .object (
820+ with patch .object (layout , "get_model" , lambda : MockDetectionModel () ), patch .object (
797821 layout ,
798822 "load_pdf" ,
799823 lambda * args , ** kwargs : ([[]], [mock_image_path ]),
800- ), patch .object (
801- layout ,
802- "UnstructuredObjectDetectionModel" ,
803- object ,
804824 ):
805825 doc = layout .DocumentLayout .from_file ("sample-docs/layout-parser-paper.pdf" )
806826 page = doc .pages [0 ]
@@ -960,3 +980,20 @@ def test_warning_if_chipper_and_low_dpi(caplog):
960980 mock_from_file .assert_called_once ()
961981 assert caplog .records [0 ].levelname == "WARNING"
962982 assert "DPI >= 300" in caplog .records [0 ].msg
983+
984+
985+ @pytest .mark .parametrize (
986+ ("filename" , "img_num" , "should_complete" ),
987+ [("sample-docs/empty-document.pdf" , 0 , True ), ("sample-docs/empty-document.pdf" , 10 , False )],
988+ )
989+ def test_get_image (filename , img_num , should_complete ):
990+ doc = layout .DocumentLayout .from_file (filename )
991+ page = doc .pages [0 ]
992+ try :
993+ img = page ._get_image (filename , img_num )
994+ # transform img to numpy array
995+ img = np .array (img )
996+ # is a blank image with all pixels white
997+ assert img .mean () == 255.0
998+ except ValueError :
999+ assert not should_complete
0 commit comments