Skip to content

Commit 963a398

Browse files
committed
Added checks for existence of PDF file list and individual files, plus check for parse success for individual PDFs.
1 parent fba8dc6 commit 963a398

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

src/parserindexer/parser.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,15 @@ def parse_file(self, path):
4040
:param path: path to file
4141
:return: parsed content
4242
"""
43-
parsed = tkparser.from_file(path)
43+
if not os.path.exists(path):
44+
print('Error: Could not find PDF file %s.' % path)
45+
sys.exit(1)
46+
47+
try:
48+
parsed = tkparser.from_file(path)
49+
except:
50+
print('Error: Could not parse PDF file %s.' % path)
51+
sys.exit(1)
4452
parsed['file'] = os.path.abspath(path)
4553
return parsed
4654

@@ -64,8 +72,16 @@ def main(parser_class, args):
6472
parser = parser_class(**args)
6573
# get stream/list of files
6674
if args['list']:
75+
if not os.path.exists(args['list']):
76+
print('Error: Could not find file containing input paths %s.' %
77+
args['list'])
78+
sys.exit(1)
6779
files = read_lines(args['list'])
6880
else:
81+
if not os.path.exists(args['in']):
82+
print('Error: Could not find input PDF file %s.' %
83+
args['in'])
84+
sys.exit(1)
6985
files = [args['in']]
7086
# Step : Parse
7187
parsed = parser.parse_files(files)

0 commit comments

Comments
 (0)