@@ -53,6 +53,13 @@ def parse_args():
5353 help = "Enable interactive mode (default: False)" ,
5454 )
5555
56+ # Add an optional boolean argument for interactive mode
57+ parser .add_argument (
58+ "--display-text" ,
59+ action = "store_true" ,
60+ help = "Enable interactive mode (default: False)" ,
61+ )
62+
5663 # Add an argument for the output directory, defaulting to "./tmp"
5764 parser .add_argument (
5865 "-o" ,
@@ -91,11 +98,17 @@ def parse_args():
9198 args .interactive ,
9299 args .output_dir ,
93100 int (args .page ),
101+ args .display_text ,
94102 )
95103
96104
97105def visualise_v1 (
98- log_level : str , pdf_path : str , interactive : str , output_dir : str , page_num : int
106+ log_level : str ,
107+ pdf_path : str ,
108+ interactive : str ,
109+ output_dir : str ,
110+ page_num : int ,
111+ display_text : bool ,
99112):
100113
101114 parser = pdf_parser_v1 ()
@@ -200,7 +213,12 @@ def visualise_v1(
200213
201214
202215def visualise_v2 (
203- log_level : str , pdf_path : str , interactive : str , output_dir : str , page_num : int
216+ log_level : str ,
217+ pdf_path : str ,
218+ interactive : str ,
219+ output_dir : str ,
220+ page_num : int ,
221+ display_text : bool ,
204222):
205223
206224 parser = pdf_parser_v2 (log_level )
@@ -214,10 +232,17 @@ def visualise_v2(
214232
215233 doc = None
216234
217- if page_num == - 1 :
218- doc = parser .parse_pdf_from_key (doc_key )
219- else :
220- doc = parser .parse_pdf_from_key_on_page (doc_key , page_num )
235+ try :
236+ if page_num == - 1 :
237+ doc = parser .parse_pdf_from_key (doc_key )
238+ else :
239+ doc = parser .parse_pdf_from_key_on_page (doc_key , page_num )
240+ except Exception as exc :
241+ print (f"Could not parse pdf-document: { exc } " )
242+ doc = None
243+
244+ if doc == None :
245+ return
221246
222247 parser .unload_document (doc_key )
223248
@@ -295,6 +320,9 @@ def visualise_v2(
295320 (x [3 ], H - y [3 ]),
296321 ]
297322
323+ if display_text :
324+ print (row [cells_header .index ("text" )])
325+
298326 if "glyph" in row [cells_header .index ("text" )]:
299327 print (f" skip cell -> { row } " )
300328 continue
@@ -328,12 +356,12 @@ def visualise_v2(
328356
329357def main ():
330358
331- log_level , version , pdf , interactive , output_dir , page = parse_args ()
359+ log_level , version , pdf , interactive , output_dir , page , display_text = parse_args ()
332360
333361 if version == "v1" :
334- visualise_v1 (log_level , pdf , interactive , output_dir , page )
362+ visualise_v1 (log_level , pdf , interactive , output_dir , page , display_text )
335363 elif version == "v2" :
336- visualise_v2 (log_level , pdf , interactive , output_dir , page )
364+ visualise_v2 (log_level , pdf , interactive , output_dir , page , display_text )
337365 else :
338366 return - 1
339367
0 commit comments