Skip to content

Commit 7521804

Browse files
committed
Merge pull request #43 from Hwesta/use-logger
Use logger and with statements, don't use sys.exit
2 parents ac70f0f + 8258044 commit 7521804

File tree

1 file changed

+66
-87
lines changed

1 file changed

+66
-87
lines changed

bagit.py

Lines changed: 66 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,13 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
8181
the bag_info dictionary.
8282
"""
8383
bag_dir = os.path.abspath(bag_dir)
84-
logging.info("creating bag for directory %s" % bag_dir)
84+
logger.info("creating bag for directory %s" % bag_dir)
8585
# assume md5 checksum if not specified
8686
if not checksum:
8787
checksum = ['md5']
8888

8989
if not os.path.isdir(bag_dir):
90-
logging.error("no such bag directory %s" % bag_dir)
90+
logger.error("no such bag directory %s" % bag_dir)
9191
raise RuntimeError("no such bag directory %s" % bag_dir)
9292

9393
old_dir = os.path.abspath(os.path.curdir)
@@ -96,17 +96,17 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
9696
try:
9797
unbaggable = _can_bag(os.curdir)
9898
if unbaggable:
99-
logging.error("no write permissions for the following directories and files: \n%s", unbaggable)
100-
sys.exit("\nNot all files/folders can be moved.")
99+
logger.error("no write permissions for the following directories and files: \n%s", unbaggable)
100+
raise BagError("Not all files/folders can be moved.")
101101
unreadable_dirs, unreadable_files = _can_read(os.curdir)
102102
if unreadable_dirs or unreadable_files:
103103
if unreadable_dirs:
104-
logging.error("The following directories do not have read permissions: \n%s", unreadable_dirs)
104+
logger.error("The following directories do not have read permissions: \n%s", unreadable_dirs)
105105
if unreadable_files:
106-
logging.error("The following files do not have read permissions: \n%s", unreadable_files)
107-
sys.exit("\nRead permissions are required to calculate file fixities.")
106+
logger.error("The following files do not have read permissions: \n%s", unreadable_files)
107+
raise BagError("Read permissions are required to calculate file fixities.")
108108
else:
109-
logging.info("creating data dir")
109+
logger.info("creating data dir")
110110

111111
cwd = os.getcwd()
112112
temp_data = tempfile.mkdtemp(dir=cwd)
@@ -115,26 +115,26 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
115115
if os.path.abspath(f) == temp_data:
116116
continue
117117
new_f = os.path.join(temp_data, f)
118-
logging.info("moving %s to %s" % (f, new_f))
118+
logger.info("moving %s to %s" % (f, new_f))
119119
os.rename(f, new_f)
120120

121-
logging.info("moving %s to %s" % (temp_data, 'data'))
121+
logger.info("moving %s to %s" % (temp_data, 'data'))
122122
os.rename(temp_data, 'data')
123123

124124
# permissions for the payload directory should match those of the
125125
# original directory
126126
os.chmod('data', os.stat(cwd).st_mode)
127127

128128
for c in checksum:
129-
logging.info("writing manifest-%s.txt" % c)
129+
logger.info("writing manifest-%s.txt" % c)
130130
Oxum = _make_manifest('manifest-%s.txt' % c, 'data', processes, c)
131131

132-
logging.info("writing bagit.txt")
132+
logger.info("writing bagit.txt")
133133
txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n"""
134134
with open("bagit.txt", "w") as bagit_file:
135135
bagit_file.write(txt)
136136

137-
logging.info("writing bag-info.txt")
137+
logger.info("writing bag-info.txt")
138138
if bag_info is None:
139139
bag_info = {}
140140

@@ -148,14 +148,12 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
148148

149149
for c in checksum:
150150
_make_tagmanifest_file(c, bag_dir)
151-
152-
153-
except Exception as e:
151+
except Exception:
152+
logger.exception("An error occurred creating the bag")
153+
raise
154+
finally:
154155
os.chdir(old_dir)
155-
logging.exception(e)
156-
raise e
157156

158-
os.chdir(old_dir)
159157
return Bag(bag_dir)
160158

161159

@@ -289,24 +287,24 @@ def save(self, processes=1, manifests=False):
289287
if manifests:
290288
unbaggable = _can_bag(self.path)
291289
if unbaggable:
292-
logging.error("no write permissions for the following directories and files: \n%s", unbaggable)
290+
logger.error("no write permissions for the following directories and files: \n%s", unbaggable)
293291
raise BagError("Not all files/folders can be moved.")
294292
unreadable_dirs, unreadable_files = _can_read(self.path)
295293
if unreadable_dirs or unreadable_files:
296294
if unreadable_dirs:
297-
logging.error("The following directories do not have read permissions: \n%s", unreadable_dirs)
295+
logger.error("The following directories do not have read permissions: \n%s", unreadable_dirs)
298296
if unreadable_files:
299-
logging.error("The following files do not have read permissions: \n%s", unreadable_files)
297+
logger.error("The following files do not have read permissions: \n%s", unreadable_files)
300298
raise BagError("Read permissions are required to calculate file fixities.")
301299

302300
oxum = None
303301
self.algs = list(set(self.algs)) # Dedupe
304302
for alg in self.algs:
305-
logging.info('updating manifest-%s.txt', alg)
303+
logger.info('updating manifest-%s.txt', alg)
306304
oxum = _make_manifest('manifest-%s.txt' % alg, 'data', processes, alg)
307305

308306
# Update Payload-Oxum
309-
logging.info('updating %s', self.tag_file_name)
307+
logger.info('updating %s', self.tag_file_name)
310308
if oxum:
311309
self.info['Payload-Oxum'] = oxum
312310

@@ -341,17 +339,10 @@ def fetch_entries(self):
341339
fetch_file_path = os.path.join(self.path, "fetch.txt")
342340

343341
if isfile(fetch_file_path):
344-
fetch_file = open(fetch_file_path, 'rb')
345-
346-
try:
342+
with open(fetch_file_path, 'rb') as fetch_file:
347343
for line in fetch_file:
348344
parts = line.strip().split(None, 2)
349345
yield (parts[0], parts[1], parts[2])
350-
except Exception as e:
351-
fetch_file.close()
352-
raise e
353-
354-
fetch_file.close()
355346

356347
def files_to_be_fetched(self):
357348
for f, size, path in self.fetch_entries():
@@ -409,7 +400,7 @@ def _load_manifests(self):
409400

410401
# Format is FILENAME *CHECKSUM
411402
if len(entry) != 2:
412-
logging.error("%s: Invalid %s manifest entry: %s", self, alg, line)
403+
logger.error("%s: Invalid %s manifest entry: %s", self, alg, line)
413404
continue
414405

415406
entry_hash = entry[0]
@@ -489,11 +480,11 @@ def _validate_entries(self, processes):
489480
only_in_manifests, only_on_fs = self.compare_manifests_with_fs()
490481
for path in only_in_manifests:
491482
e = FileMissing(path)
492-
logging.warning(str(e))
483+
logger.warning(str(e))
493484
errors.append(e)
494485
for path in only_on_fs:
495486
e = UnexpectedFile(path)
496-
logging.warning(str(e))
487+
logger.warning(str(e))
497488
errors.append(e)
498489

499490
# To avoid the overhead of reading the file more than once or loading
@@ -507,7 +498,7 @@ def _validate_entries(self, processes):
507498
hashlib.new(alg)
508499
available_hashers.add(alg)
509500
except ValueError:
510-
logging.warning("Unable to validate file contents using unknown %s hash algorithm", alg)
501+
logger.warning("Unable to validate file contents using unknown %s hash algorithm", alg)
511502

512503
if not available_hashers:
513504
raise RuntimeError("%s: Unable to validate bag contents: none of the hash algorithms in %s are supported!" % (self, self.algs))
@@ -532,15 +523,15 @@ def _init_worker():
532523
pass
533524
# Any unhandled exceptions are probably fatal
534525
except:
535-
logging.exception("unable to calculate file hashes for %s", self)
526+
logger.exception("unable to calculate file hashes for %s", self)
536527
raise
537528

538529
for rel_path, f_hashes, hashes in hash_results:
539530
for alg, computed_hash in list(f_hashes.items()):
540531
stored_hash = hashes[alg]
541532
if stored_hash.lower() != computed_hash:
542533
e = ChecksumMismatch(rel_path, alg, stored_hash.lower(), computed_hash)
543-
logging.warning(str(e))
534+
logger.warning(str(e))
544535
errors.append(e)
545536

546537
if errors:
@@ -551,13 +542,10 @@ def _validate_bagittxt(self):
551542
Verify that bagit.txt conforms to specification
552543
"""
553544
bagit_file_path = os.path.join(self.path, "bagit.txt")
554-
bagit_file = open(bagit_file_path, 'r')
555-
try:
545+
with open(bagit_file_path, 'r') as bagit_file:
556546
first_line = bagit_file.readline()
557547
if first_line.startswith(BOM):
558548
raise BagValidationError("bagit.txt must not contain a byte-order mark")
559-
finally:
560-
bagit_file.close()
561549

562550

563551
class BagError(Exception):
@@ -573,7 +561,7 @@ def __str__(self):
573561
return "%s: %s" % (self.message, details)
574562
return self.message
575563

576-
class ManifestErrorDetail():
564+
class ManifestErrorDetail(BagError):
577565
def __init__(self, path):
578566
self.path = path
579567

@@ -624,33 +612,25 @@ def _calculate_file_hashes(full_path, f_hashers):
624612
raise BagValidationError("%s does not exist" % full_path)
625613

626614
try:
627-
try:
628-
f = open(full_path, 'rb')
615+
with open(full_path, 'rb') as f:
629616
while True:
630617
block = f.read(1048576)
631618
if not block:
632619
break
633620
for i in list(f_hashers.values()):
634621
i.update(block)
635-
except IOError as e:
636-
raise BagValidationError("could not read %s: %s" % (full_path, str(e)))
637-
except OSError as e:
638-
raise BagValidationError("could not read %s: %s" % (full_path, str(e)))
639-
finally:
640-
try:
641-
f.close()
642-
except:
643-
pass
622+
except IOError as e:
623+
raise BagValidationError("could not read %s: %s" % (full_path, str(e)))
624+
except OSError as e:
625+
raise BagValidationError("could not read %s: %s" % (full_path, str(e)))
644626

645627
return dict(
646628
(alg, h.hexdigest()) for alg, h in list(f_hashers.items())
647629
)
648630

649631

650632
def _load_tag_file(tag_file_name):
651-
tag_file = open(tag_file_name, 'r')
652-
653-
try:
633+
with open(tag_file_name, 'r') as tag_file:
654634
# Store duplicate tags as list of vals
655635
# in order of parsing under the same key.
656636
tags = {}
@@ -665,9 +645,6 @@ def _load_tag_file(tag_file_name):
665645
tags[name].append(value)
666646
return tags
667647

668-
finally:
669-
tag_file.close()
670-
671648
def _parse_tags(file):
672649
"""Parses a tag file, according to RFC 2822. This
673650
includes line folding, permitting extra-long
@@ -725,7 +702,7 @@ def _make_tag_file(bag_info_path, bag_info):
725702

726703

727704
def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'):
728-
logging.info('writing manifest with %s processes' % processes)
705+
logger.info('writing manifest with %s processes' % processes)
729706

730707
if algorithm == 'md5':
731708
manifest_line = _manifest_line_md5
@@ -764,21 +741,20 @@ def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'):
764741

765742
def _make_tagmanifest_file(alg, bag_dir):
766743
tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg)
767-
logging.info("writing %s", tagmanifest_file)
744+
logger.info("writing %s", tagmanifest_file)
768745
files = [f for f in listdir(bag_dir) if isfile(join(bag_dir, f))]
769746
checksums = []
770747
for f in files:
771748
if re.match('^tagmanifest-.+\.txt$', f):
772749
continue
773-
fh = open(join(bag_dir, f), 'rb')
774-
m = _hasher(alg)
775-
while True:
776-
bytes = fh.read(16384)
777-
if not bytes:
778-
break
779-
m.update(bytes)
780-
checksums.append((m.hexdigest(), f))
781-
fh.close()
750+
with open(join(bag_dir, f), 'rb') as fh:
751+
m = _hasher(alg)
752+
while True:
753+
bytes = fh.read(16384)
754+
if not bytes:
755+
break
756+
m.update(bytes)
757+
checksums.append((m.hexdigest(), f))
782758

783759
with open(join(bag_dir, tagmanifest_file), 'w') as tagmanifest:
784760
for digest, filename in checksums:
@@ -847,16 +823,16 @@ def _hasher(algorithm='md5'):
847823
return m
848824

849825
def _manifest_line(filename, algorithm='md5'):
850-
fh = open(filename, 'rb')
851-
m = _hasher(algorithm)
826+
with open(filename, 'rb') as fh:
827+
m = _hasher(algorithm)
852828

853-
total_bytes = 0
854-
while True:
855-
bytes = fh.read(16384)
856-
total_bytes += len(bytes)
857-
if not bytes: break
858-
m.update(bytes)
859-
fh.close()
829+
total_bytes = 0
830+
while True:
831+
bytes = fh.read(16384)
832+
total_bytes += len(bytes)
833+
if not bytes:
834+
break
835+
m.update(bytes)
860836

861837
return (m.hexdigest(), _decode_filename(filename), total_bytes)
862838

@@ -928,7 +904,6 @@ def _configure_logging(opts):
928904
opt_parser.error("number of processes needs to be 0 or more")
929905

930906
_configure_logging(opts)
931-
log = logging.getLogger()
932907

933908
rc = 0
934909
for bag_dir in args:
@@ -940,17 +915,21 @@ def _configure_logging(opts):
940915
# validate throws a BagError or BagValidationError
941916
valid = bag.validate(processes=opts.processes, fast=opts.fast)
942917
if opts.fast:
943-
log.info("%s valid according to Payload-Oxum", bag_dir)
918+
logger.info("%s valid according to Payload-Oxum", bag_dir)
944919
else:
945-
log.info("%s is valid", bag_dir)
920+
logger.info("%s is valid", bag_dir)
946921
except BagError as e:
947-
log.info("%s is invalid: %s", bag_dir, e)
922+
logger.info("%s is invalid: %s", bag_dir, e)
948923
rc = 1
949924

950925
# make the bag
951926
else:
952-
make_bag(bag_dir, bag_info=opt_parser.bag_info,
953-
processes=opts.processes,
954-
checksum=opts.checksum)
927+
try:
928+
make_bag(bag_dir, bag_info=opt_parser.bag_info,
929+
processes=opts.processes,
930+
checksum=opts.checksum)
931+
except Exception:
932+
logger.info("%s failed to create: %s", bag_dir, e)
933+
rc = 1
955934

956935
sys.exit(rc)

0 commit comments

Comments
 (0)