Skip to content

Commit 7142f83

Browse files
committed
Merge pull request #44 from acdha/pylint-warnings
Linter cleanup
2 parents 7521804 + 27eeda8 commit 7142f83

File tree

1 file changed

+74
-53
lines changed

1 file changed

+74
-53
lines changed

bagit.py

Lines changed: 74 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,19 @@
3131
% bagit.py --help
3232
"""
3333

34-
import os
35-
import re
36-
import sys
3734
import codecs
38-
import signal
3935
import hashlib
4036
import logging
37+
import multiprocessing
4138
import optparse
39+
import os
40+
import re
41+
import signal
42+
import sys
4243
import tempfile
43-
import multiprocessing
44-
45-
from os import listdir
4644
from datetime import date
47-
from os.path import isdir, isfile, join, abspath
45+
from os import listdir
46+
from os.path import abspath, isdir, isfile, join
4847

4948
logger = logging.getLogger(__name__)
5049

@@ -121,7 +120,7 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
121120
logger.info("moving %s to %s" % (temp_data, 'data'))
122121
os.rename(temp_data, 'data')
123122

124-
# permissions for the payload directory should match those of the
123+
# permissions for the payload directory should match those of the
125124
# original directory
126125
os.chmod('data', os.stat(cwd).st_mode)
127126

@@ -232,7 +231,7 @@ def compare_manifests_with_fs(self):
232231
files_in_manifest = files_in_manifest | set(self.missing_optional_tagfiles())
233232

234233
return (list(files_in_manifest - files_on_fs),
235-
list(files_on_fs - files_in_manifest))
234+
list(files_on_fs - files_in_manifest))
236235

237236
def compare_fetch_with_fs(self):
238237
"""Compares the fetch entries with the files actually
@@ -258,20 +257,20 @@ def payload_files(self):
258257

259258
def payload_entries(self):
260259
# Don't use dict comprehension (compatibility with Python < 2.7)
261-
return dict((key, value) for (key, value) in self.entries.items() \
262-
if key.startswith("data" + os.sep))
260+
return dict((key, value) for (key, value) in self.entries.items()
261+
if key.startswith("data" + os.sep))
263262

264263
def save(self, processes=1, manifests=False):
265264
"""
266-
save will persist any changes that have been made to the bag
267-
metadata (self.info).
268-
269-
If you have modified the payload of the bag (added, modified,
265+
save will persist any changes that have been made to the bag
266+
metadata (self.info).
267+
268+
If you have modified the payload of the bag (added, modified,
270269
removed files in the data directory) and want to regenerate manifests
271-
set the manifests parameter to True. The default is False since you
272-
wouldn't want a save to accidentally create a new manifest for
273-
a corrupted bag.
274-
270+
set the manifests parameter to True. The default is False since you
271+
wouldn't want a save to accidentally create a new manifest for
272+
a corrupted bag.
273+
275274
If you want to control the number of processes that are used when
276275
recalculating checksums use the processes parameter.
277276
"""
@@ -320,8 +319,8 @@ def save(self, processes=1, manifests=False):
320319
os.chdir(old_dir)
321320

322321
def tagfile_entries(self):
323-
return dict((key, value) for (key, value) in self.entries.items() \
324-
if not key.startswith("data" + os.sep))
322+
return dict((key, value) for (key, value) in self.entries.items()
323+
if not key.startswith("data" + os.sep))
325324

326325
def missing_optional_tagfiles(self):
327326
"""
@@ -346,16 +345,16 @@ def fetch_entries(self):
346345

347346
def files_to_be_fetched(self):
348347
for f, size, path in self.fetch_entries():
349-
yield f
348+
yield f
350349

351350
def has_oxum(self):
352351
return 'Payload-Oxum' in self.info
353352

354353
def validate(self, processes=1, fast=False):
355-
"""Checks the structure and contents are valid. If you supply
356-
the parameter fast=True the Payload-Oxum (if present) will
357-
be used to check that the payload files are present and
358-
accounted for, instead of re-calculating fixities and
354+
"""Checks the structure and contents are valid. If you supply
355+
the parameter fast=True the Payload-Oxum (if present) will
356+
be used to check that the payload files are present and
357+
accounted for, instead of re-calculating fixities and
359358
comparing them against the manifest. By default validate()
360359
will re-calculate fixities (fast=False).
361360
"""
@@ -370,7 +369,7 @@ def is_valid(self, fast=False):
370369
"""
371370
try:
372371
self.validate(fast=fast)
373-
except BagError as e:
372+
except BagError:
374373
return False
375374
return True
376375

@@ -394,7 +393,8 @@ def _load_manifests(self):
394393
line = line.strip()
395394

396395
# Ignore blank lines and comments.
397-
if line == "" or line.startswith("#"): continue
396+
if line == "" or line.startswith("#"):
397+
continue
398398

399399
entry = line.split(None, 1)
400400

@@ -440,15 +440,17 @@ def _validate_contents(self, processes=1, fast=False):
440440
raise BagValidationError("cannot validate Bag with fast=True if Bag lacks a Payload-Oxum")
441441
self._validate_oxum() # Fast
442442
if not fast:
443-
self._validate_entries(processes) # *SLOW*
443+
self._validate_entries(processes) # *SLOW*
444444

445445
def _validate_oxum(self):
446446
oxum = self.info.get('Payload-Oxum')
447-
if oxum == None: return
447+
448+
if oxum is None:
449+
return
448450

449451
# If multiple Payload-Oxum tags (bad idea)
450452
# use the first listed in bag-info.txt
451-
if type(oxum) is list:
453+
if isinstance(oxum, list):
452454
oxum = oxum[0]
453455

454456
byte_count, file_count = oxum.split('.', 1)
@@ -551,33 +553,43 @@ def _validate_bagittxt(self):
551553
class BagError(Exception):
552554
pass
553555

556+
554557
class BagValidationError(BagError):
555-
def __init__(self, message, details=[]):
558+
def __init__(self, message, details=None):
559+
if details is None:
560+
details = []
561+
556562
self.message = message
557563
self.details = details
564+
558565
def __str__(self):
559566
if len(self.details) > 0:
560567
details = " ; ".join([str(e) for e in self.details])
561568
return "%s: %s" % (self.message, details)
562569
return self.message
563570

571+
564572
class ManifestErrorDetail(BagError):
565573
def __init__(self, path):
566574
self.path = path
567575

576+
568577
class ChecksumMismatch(ManifestErrorDetail):
569578
def __init__(self, path, algorithm=None, expected=None, found=None):
570579
self.path = path
571580
self.algorithm = algorithm
572581
self.expected = expected
573582
self.found = found
583+
574584
def __str__(self):
575585
return "%s checksum validation failed (alg=%s expected=%s found=%s)" % (self.path, self.algorithm, self.expected, self.found)
576586

587+
577588
class FileMissing(ManifestErrorDetail):
578589
def __str__(self):
579590
return "%s exists in manifest but not found on filesystem" % self.path
580591

592+
581593
class UnexpectedFile(ManifestErrorDetail):
582594
def __str__(self):
583595
return "%s exists on filesystem but is not in manifest" % self.path
@@ -635,7 +647,7 @@ def _load_tag_file(tag_file_name):
635647
# in order of parsing under the same key.
636648
tags = {}
637649
for name, value in _parse_tags(tag_file):
638-
if not name in tags:
650+
if name not in tags:
639651
tags[name] = value
640652
continue
641653

@@ -657,7 +669,7 @@ def _parse_tags(file):
657669
tag_name = None
658670
tag_value = None
659671

660-
# Line folding is handled by yielding values only after we encounter
672+
# Line folding is handled by yielding values only after we encounter
661673
# the start of a new tag, or if we pass the EOF.
662674
for num, line in enumerate(file):
663675
# If byte-order mark ignore it for now.
@@ -667,14 +679,14 @@ def _parse_tags(file):
667679
# Skip over any empty or blank lines.
668680
if len(line) == 0 or line.isspace():
669681
continue
670-
elif line[0].isspace() and tag_value != None : # folded line
682+
elif line[0].isspace() and tag_value is not None: # folded line
671683
tag_value += line
672684
else:
673685
# Starting a new tag; yield the last one.
674686
if tag_name:
675687
yield (tag_name, tag_value.strip())
676688

677-
if not ':' in line:
689+
if ':' not in line:
678690
raise BagValidationError("invalid line '%s' in %s" % (line.strip(), os.path.basename(file.name)))
679691

680692
parts = line.strip().split(':', 1)
@@ -697,7 +709,7 @@ def _make_tag_file(bag_info_path, bag_info):
697709
else:
698710
txt = bag_info[h]
699711
# strip CR, LF and CRLF so they don't mess up the tag file
700-
txt = re.sub('\n|\r|(\r\n)', '', txt)
712+
txt = re.sub('\n|\r|(\r\n)', '', txt)
701713
f.write("%s: %s\n" % (h, txt))
702714

703715

@@ -716,10 +728,6 @@ def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'):
716728
raise RuntimeError("unknown algorithm %s" % algorithm)
717729

718730
if processes > 1:
719-
# avoid using multiprocessing unless it is required since
720-
# multiprocessing doesn't work in some environments (mod_wsgi, etc)
721-
722-
import multiprocessing
723731
pool = multiprocessing.Pool(processes=processes)
724732
checksums = pool.map(manifest_line, _walk(data_dir))
725733
pool.close()
@@ -763,8 +771,8 @@ def _make_tagmanifest_file(alg, bag_dir):
763771

764772
def _walk(data_dir):
765773
for dirpath, dirnames, filenames in os.walk(data_dir):
766-
# if we don't sort here the order of entries is non-deterministic
767-
# which makes it hard to test the fixity of tagmanifest-md5.txt
774+
# if we don't sort here the order of entries is non-deterministic
775+
# which makes it hard to test the fixity of tagmanifest-md5.txt
768776
filenames.sort()
769777
dirnames.sort()
770778
for fn in filenames:
@@ -775,6 +783,7 @@ def _walk(data_dir):
775783
path = '/'.join(parts)
776784
yield path
777785

786+
778787
def _can_bag(test_dir):
779788
"""returns (unwriteable files/folders)
780789
"""
@@ -784,6 +793,7 @@ def _can_bag(test_dir):
784793
unwriteable.append(os.path.join(os.path.abspath(test_dir), inode))
785794
return tuple(unwriteable)
786795

796+
787797
def _can_read(test_dir):
788798
"""
789799
returns ((unreadable_dirs), (unreadable_files))
@@ -799,18 +809,23 @@ def _can_read(test_dir):
799809
unreadable_files.append(os.path.join(dirpath, fn))
800810
return (tuple(unreadable_dirs), tuple(unreadable_files))
801811

812+
802813
def _manifest_line_md5(filename):
803814
return _manifest_line(filename, 'md5')
804815

816+
805817
def _manifest_line_sha1(filename):
806818
return _manifest_line(filename, 'sha1')
807819

820+
808821
def _manifest_line_sha256(filename):
809822
return _manifest_line(filename, 'sha256')
810823

824+
811825
def _manifest_line_sha512(filename):
812826
return _manifest_line(filename, 'sha512')
813827

828+
814829
def _hasher(algorithm='md5'):
815830
if algorithm == 'md5':
816831
m = hashlib.md5()
@@ -822,6 +837,7 @@ def _hasher(algorithm='md5'):
822837
m = hashlib.sha512()
823838
return m
824839

840+
825841
def _manifest_line(filename, algorithm='md5'):
826842
with open(filename, 'rb') as fh:
827843
m = _hasher(algorithm)
@@ -836,11 +852,13 @@ def _manifest_line(filename, algorithm='md5'):
836852

837853
return (m.hexdigest(), _decode_filename(filename), total_bytes)
838854

855+
839856
def _encode_filename(s):
840857
s = s.replace("\r", "%0D")
841858
s = s.replace("\n", "%0A")
842859
return s
843860

861+
844862
def _decode_filename(s):
845863
s = re.sub("%0D", "\r", s, re.IGNORECASE)
846864
s = re.sub("%0A", "\n", s, re.IGNORECASE)
@@ -854,11 +872,13 @@ def __init__(self, *args, **opts):
854872
self.bag_info = {}
855873
optparse.OptionParser.__init__(self, *args, **opts)
856874

875+
857876
def _bag_info_store(option, opt, value, parser):
858877
opt = opt.lstrip('--')
859878
opt_caps = '-'.join([o.capitalize() for o in opt.split('-')])
860879
parser.bag_info[opt_caps] = value
861880

881+
862882
def _make_opt_parser():
863883
parser = BagOptionParser(usage='usage: %prog [options] dir1 dir2 ...')
864884
parser.add_option('--processes', action='store', type="int",
@@ -871,22 +891,23 @@ def _make_opt_parser():
871891

872892
# optionally specify which checksum algorithm(s) to use when creating a bag
873893
# NOTE: could generate from checksum_algos ?
874-
parser.add_option('--md5', action='append_const', dest='checksum',
875-
const='md5', help='Generate MD5 manifest when creating a bag (default)')
876-
parser.add_option('--sha1', action='append_const', dest='checksum',
877-
const='sha1', help='Generate SHA1 manifest when creating a bag')
878-
parser.add_option('--sha256', action='append_const', dest='checksum',
879-
const='sha256', help='Generate SHA-256 manifest when creating a bag')
880-
parser.add_option('--sha512', action='append_const', dest='checksum',
881-
const='sha512', help='Generate SHA-512 manifest when creating a bag')
894+
parser.add_option('--md5', action='append_const', dest='checksum', const='md5',
895+
help='Generate MD5 manifest when creating a bag (default)')
896+
parser.add_option('--sha1', action='append_const', dest='checksum', const='sha1',
897+
help='Generate SHA1 manifest when creating a bag')
898+
parser.add_option('--sha256', action='append_const', dest='checksum', const='sha256',
899+
help='Generate SHA-256 manifest when creating a bag')
900+
parser.add_option('--sha512', action='append_const', dest='checksum', const='sha512',
901+
help='Generate SHA-512 manifest when creating a bag')
882902

883903
for header in _bag_info_headers:
884904
parser.add_option('--%s' % header.lower(), type="string",
885905
action='callback', callback=_bag_info_store)
886906
return parser
887907

908+
888909
def _configure_logging(opts):
889-
log_format="%(asctime)s - %(levelname)s - %(message)s"
910+
log_format = "%(asctime)s - %(levelname)s - %(message)s"
890911
if opts.quiet:
891912
level = logging.ERROR
892913
else:

0 commit comments

Comments
 (0)