@@ -95,10 +95,10 @@ def parse_file_type(blocks):
9595 return FileType .UNKNOWN
9696
9797
98- def parse_investor_info (page_dict ) -> InvestorInfo :
98+ def parse_investor_info (page_dict , page_rect : fitz . Rect ) -> InvestorInfo :
9999 """Parse investor info."""
100- width = max (page_dict [ " width" ] , 600 )
101- height = max (page_dict [ " height" ] , 800 )
100+ width = max (page_rect . width , 600 )
101+ height = max (page_rect . height , 800 )
102102
103103 blocks = sorted (
104104 [x for x in page_dict ["blocks" ] if x ["bbox" ][1 ] < height / 2 ], key = lambda x : x ["bbox" ][1 ]
@@ -190,7 +190,7 @@ def cas_pdf_to_text(filename: Union[str, io.IOBase], password) -> PartialCASData
190190
191191 with fp :
192192 try :
193- doc = fitz .open (stream = fp .read (), filetype = "pdf" )
193+ doc = fitz .Document (stream = fp .read (), filetype = "pdf" )
194194 except Exception as e :
195195 raise CASParseError ("Unhandled error while opening file :: %s" % (str (e )))
196196
@@ -210,7 +210,7 @@ def cas_pdf_to_text(filename: Union[str, io.IOBase], password) -> PartialCASData
210210 file_type = parse_file_type (blocks )
211211 sorted_blocks = sorted (blocks , key = itemgetter (1 , 0 ))
212212 if investor_info is None :
213- investor_info = parse_investor_info (page_dict )
213+ investor_info = parse_investor_info (page_dict , page . rect )
214214 pages .append (sorted_blocks )
215215 lines = group_similar_rows (pages )
216216 return PartialCASData (file_type = file_type , investor_info = investor_info , lines = lines )
0 commit comments