3131 % bagit.py --help
3232"""
3333
34- import os
35- import re
36- import sys
3734import codecs
38- import signal
3935import hashlib
4036import logging
37+ import multiprocessing
4138import optparse
39+ import os
40+ import re
41+ import signal
42+ import sys
4243import tempfile
43- import multiprocessing
44-
45- from os import listdir
4644from datetime import date
47- from os .path import isdir , isfile , join , abspath
45+ from os import listdir
46+ from os .path import abspath , isdir , isfile , join
4847
4948logger = logging .getLogger (__name__ )
5049
@@ -121,7 +120,7 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
121120 logger .info ("moving %s to %s" % (temp_data , 'data' ))
122121 os .rename (temp_data , 'data' )
123122
124- # permissions for the payload directory should match those of the
123+ # permissions for the payload directory should match those of the
125124 # original directory
126125 os .chmod ('data' , os .stat (cwd ).st_mode )
127126
@@ -232,7 +231,7 @@ def compare_manifests_with_fs(self):
232231 files_in_manifest = files_in_manifest | set (self .missing_optional_tagfiles ())
233232
234233 return (list (files_in_manifest - files_on_fs ),
235- list (files_on_fs - files_in_manifest ))
234+ list (files_on_fs - files_in_manifest ))
236235
237236 def compare_fetch_with_fs (self ):
238237 """Compares the fetch entries with the files actually
@@ -258,20 +257,20 @@ def payload_files(self):
258257
259258 def payload_entries (self ):
260259 # Don't use dict comprehension (compatibility with Python < 2.7)
261- return dict ((key , value ) for (key , value ) in self .entries .items () \
262- if key .startswith ("data" + os .sep ))
260+ return dict ((key , value ) for (key , value ) in self .entries .items ()
261+ if key .startswith ("data" + os .sep ))
263262
264263 def save (self , processes = 1 , manifests = False ):
265264 """
266- save will persist any changes that have been made to the bag
267- metadata (self.info).
268-
269- If you have modified the payload of the bag (added, modified,
265+ save will persist any changes that have been made to the bag
266+ metadata (self.info).
267+
268+ If you have modified the payload of the bag (added, modified,
270269 removed files in the data directory) and want to regenerate manifests
271- set the manifests parameter to True. The default is False since you
272- wouldn't want a save to accidentally create a new manifest for
273- a corrupted bag.
274-
270+ set the manifests parameter to True. The default is False since you
271+ wouldn't want a save to accidentally create a new manifest for
272+ a corrupted bag.
273+
275274 If you want to control the number of processes that are used when
276275 recalculating checksums use the processes parameter.
277276 """
@@ -320,8 +319,8 @@ def save(self, processes=1, manifests=False):
320319 os .chdir (old_dir )
321320
322321 def tagfile_entries (self ):
323- return dict ((key , value ) for (key , value ) in self .entries .items () \
324- if not key .startswith ("data" + os .sep ))
322+ return dict ((key , value ) for (key , value ) in self .entries .items ()
323+ if not key .startswith ("data" + os .sep ))
325324
326325 def missing_optional_tagfiles (self ):
327326 """
@@ -346,16 +345,16 @@ def fetch_entries(self):
346345
347346 def files_to_be_fetched (self ):
348347 for f , size , path in self .fetch_entries ():
349- yield f
348+ yield f
350349
351350 def has_oxum (self ):
352351 return 'Payload-Oxum' in self .info
353352
354353 def validate (self , processes = 1 , fast = False ):
355- """Checks the structure and contents are valid. If you supply
356- the parameter fast=True the Payload-Oxum (if present) will
357- be used to check that the payload files are present and
358- accounted for, instead of re-calculating fixities and
354+ """Checks the structure and contents are valid. If you supply
355+ the parameter fast=True the Payload-Oxum (if present) will
356+ be used to check that the payload files are present and
357+ accounted for, instead of re-calculating fixities and
359358 comparing them against the manifest. By default validate()
360359 will re-calculate fixities (fast=False).
361360 """
@@ -370,7 +369,7 @@ def is_valid(self, fast=False):
370369 """
371370 try :
372371 self .validate (fast = fast )
373- except BagError as e :
372+ except BagError :
374373 return False
375374 return True
376375
@@ -394,7 +393,8 @@ def _load_manifests(self):
394393 line = line .strip ()
395394
396395 # Ignore blank lines and comments.
397- if line == "" or line .startswith ("#" ): continue
396+ if line == "" or line .startswith ("#" ):
397+ continue
398398
399399 entry = line .split (None , 1 )
400400
@@ -440,15 +440,17 @@ def _validate_contents(self, processes=1, fast=False):
440440 raise BagValidationError ("cannot validate Bag with fast=True if Bag lacks a Payload-Oxum" )
441441 self ._validate_oxum () # Fast
442442 if not fast :
443- self ._validate_entries (processes ) # *SLOW*
443+ self ._validate_entries (processes ) # *SLOW*
444444
445445 def _validate_oxum (self ):
446446 oxum = self .info .get ('Payload-Oxum' )
447- if oxum == None : return
447+
448+ if oxum is None :
449+ return
448450
449451 # If multiple Payload-Oxum tags (bad idea)
450452 # use the first listed in bag-info.txt
451- if type (oxum ) is list :
453+ if isinstance (oxum , list ) :
452454 oxum = oxum [0 ]
453455
454456 byte_count , file_count = oxum .split ('.' , 1 )
@@ -551,33 +553,43 @@ def _validate_bagittxt(self):
551553class BagError (Exception ):
552554 pass
553555
556+
554557class BagValidationError (BagError ):
555- def __init__ (self , message , details = []):
558+ def __init__ (self , message , details = None ):
559+ if details is None :
560+ details = []
561+
556562 self .message = message
557563 self .details = details
564+
558565 def __str__ (self ):
559566 if len (self .details ) > 0 :
560567 details = " ; " .join ([str (e ) for e in self .details ])
561568 return "%s: %s" % (self .message , details )
562569 return self .message
563570
571+
564572class ManifestErrorDetail (BagError ):
565573 def __init__ (self , path ):
566574 self .path = path
567575
576+
568577class ChecksumMismatch (ManifestErrorDetail ):
569578 def __init__ (self , path , algorithm = None , expected = None , found = None ):
570579 self .path = path
571580 self .algorithm = algorithm
572581 self .expected = expected
573582 self .found = found
583+
574584 def __str__ (self ):
575585 return "%s checksum validation failed (alg=%s expected=%s found=%s)" % (self .path , self .algorithm , self .expected , self .found )
576586
587+
577588class FileMissing (ManifestErrorDetail ):
578589 def __str__ (self ):
579590 return "%s exists in manifest but not found on filesystem" % self .path
580591
592+
581593class UnexpectedFile (ManifestErrorDetail ):
582594 def __str__ (self ):
583595 return "%s exists on filesystem but is not in manifest" % self .path
@@ -635,7 +647,7 @@ def _load_tag_file(tag_file_name):
635647 # in order of parsing under the same key.
636648 tags = {}
637649 for name , value in _parse_tags (tag_file ):
638- if not name in tags :
650+ if name not in tags :
639651 tags [name ] = value
640652 continue
641653
@@ -657,7 +669,7 @@ def _parse_tags(file):
657669 tag_name = None
658670 tag_value = None
659671
660- # Line folding is handled by yielding values only after we encounter
672+ # Line folding is handled by yielding values only after we encounter
661673 # the start of a new tag, or if we pass the EOF.
662674 for num , line in enumerate (file ):
663675 # If byte-order mark ignore it for now.
@@ -667,14 +679,14 @@ def _parse_tags(file):
667679 # Skip over any empty or blank lines.
668680 if len (line ) == 0 or line .isspace ():
669681 continue
670- elif line [0 ].isspace () and tag_value != None : # folded line
682+ elif line [0 ].isspace () and tag_value is not None : # folded line
671683 tag_value += line
672684 else :
673685 # Starting a new tag; yield the last one.
674686 if tag_name :
675687 yield (tag_name , tag_value .strip ())
676688
677- if not ':' in line :
689+ if ':' not in line :
678690 raise BagValidationError ("invalid line '%s' in %s" % (line .strip (), os .path .basename (file .name )))
679691
680692 parts = line .strip ().split (':' , 1 )
@@ -697,7 +709,7 @@ def _make_tag_file(bag_info_path, bag_info):
697709 else :
698710 txt = bag_info [h ]
699711 # strip CR, LF and CRLF so they don't mess up the tag file
700- txt = re .sub ('\n |\r |(\r \n )' , '' , txt )
712+ txt = re .sub ('\n |\r |(\r \n )' , '' , txt )
701713 f .write ("%s: %s\n " % (h , txt ))
702714
703715
@@ -716,10 +728,6 @@ def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'):
716728 raise RuntimeError ("unknown algorithm %s" % algorithm )
717729
718730 if processes > 1 :
719- # avoid using multiprocessing unless it is required since
720- # multiprocessing doesn't work in some environments (mod_wsgi, etc)
721-
722- import multiprocessing
723731 pool = multiprocessing .Pool (processes = processes )
724732 checksums = pool .map (manifest_line , _walk (data_dir ))
725733 pool .close ()
@@ -763,8 +771,8 @@ def _make_tagmanifest_file(alg, bag_dir):
763771
764772def _walk (data_dir ):
765773 for dirpath , dirnames , filenames in os .walk (data_dir ):
766- # if we don't sort here the order of entries is non-deterministic
767- # which makes it hard to test the fixity of tagmanifest-md5.txt
774+ # if we don't sort here the order of entries is non-deterministic
775+ # which makes it hard to test the fixity of tagmanifest-md5.txt
768776 filenames .sort ()
769777 dirnames .sort ()
770778 for fn in filenames :
@@ -775,6 +783,7 @@ def _walk(data_dir):
775783 path = '/' .join (parts )
776784 yield path
777785
786+
778787def _can_bag (test_dir ):
779788 """returns (unwriteable files/folders)
780789 """
@@ -784,6 +793,7 @@ def _can_bag(test_dir):
784793 unwriteable .append (os .path .join (os .path .abspath (test_dir ), inode ))
785794 return tuple (unwriteable )
786795
796+
787797def _can_read (test_dir ):
788798 """
789799 returns ((unreadable_dirs), (unreadable_files))
@@ -799,18 +809,23 @@ def _can_read(test_dir):
799809 unreadable_files .append (os .path .join (dirpath , fn ))
800810 return (tuple (unreadable_dirs ), tuple (unreadable_files ))
801811
812+
802813def _manifest_line_md5 (filename ):
803814 return _manifest_line (filename , 'md5' )
804815
816+
805817def _manifest_line_sha1 (filename ):
806818 return _manifest_line (filename , 'sha1' )
807819
820+
808821def _manifest_line_sha256 (filename ):
809822 return _manifest_line (filename , 'sha256' )
810823
824+
811825def _manifest_line_sha512 (filename ):
812826 return _manifest_line (filename , 'sha512' )
813827
828+
814829def _hasher (algorithm = 'md5' ):
815830 if algorithm == 'md5' :
816831 m = hashlib .md5 ()
@@ -822,6 +837,7 @@ def _hasher(algorithm='md5'):
822837 m = hashlib .sha512 ()
823838 return m
824839
840+
825841def _manifest_line (filename , algorithm = 'md5' ):
826842 with open (filename , 'rb' ) as fh :
827843 m = _hasher (algorithm )
@@ -836,11 +852,13 @@ def _manifest_line(filename, algorithm='md5'):
836852
837853 return (m .hexdigest (), _decode_filename (filename ), total_bytes )
838854
855+
839856def _encode_filename (s ):
840857 s = s .replace ("\r " , "%0D" )
841858 s = s .replace ("\n " , "%0A" )
842859 return s
843860
861+
844862def _decode_filename (s ):
845863 s = re .sub ("%0D" , "\r " , s , re .IGNORECASE )
846864 s = re .sub ("%0A" , "\n " , s , re .IGNORECASE )
@@ -854,11 +872,13 @@ def __init__(self, *args, **opts):
854872 self .bag_info = {}
855873 optparse .OptionParser .__init__ (self , * args , ** opts )
856874
875+
857876def _bag_info_store (option , opt , value , parser ):
858877 opt = opt .lstrip ('--' )
859878 opt_caps = '-' .join ([o .capitalize () for o in opt .split ('-' )])
860879 parser .bag_info [opt_caps ] = value
861880
881+
862882def _make_opt_parser ():
863883 parser = BagOptionParser (usage = 'usage: %prog [options] dir1 dir2 ...' )
864884 parser .add_option ('--processes' , action = 'store' , type = "int" ,
@@ -871,22 +891,23 @@ def _make_opt_parser():
871891
872892 # optionally specify which checksum algorithm(s) to use when creating a bag
873893 # NOTE: could generate from checksum_algos ?
874- parser .add_option ('--md5' , action = 'append_const' , dest = 'checksum' ,
875- const = 'md5' , help = 'Generate MD5 manifest when creating a bag (default)' )
876- parser .add_option ('--sha1' , action = 'append_const' , dest = 'checksum' ,
877- const = 'sha1' , help = 'Generate SHA1 manifest when creating a bag' )
878- parser .add_option ('--sha256' , action = 'append_const' , dest = 'checksum' ,
879- const = 'sha256' , help = 'Generate SHA-256 manifest when creating a bag' )
880- parser .add_option ('--sha512' , action = 'append_const' , dest = 'checksum' ,
881- const = 'sha512' , help = 'Generate SHA-512 manifest when creating a bag' )
894+ parser .add_option ('--md5' , action = 'append_const' , dest = 'checksum' , const = 'md5' ,
895+ help = 'Generate MD5 manifest when creating a bag (default)' )
896+ parser .add_option ('--sha1' , action = 'append_const' , dest = 'checksum' , const = 'sha1' ,
897+ help = 'Generate SHA1 manifest when creating a bag' )
898+ parser .add_option ('--sha256' , action = 'append_const' , dest = 'checksum' , const = 'sha256' ,
899+ help = 'Generate SHA-256 manifest when creating a bag' )
900+ parser .add_option ('--sha512' , action = 'append_const' , dest = 'checksum' , const = 'sha512' ,
901+ help = 'Generate SHA-512 manifest when creating a bag' )
882902
883903 for header in _bag_info_headers :
884904 parser .add_option ('--%s' % header .lower (), type = "string" ,
885905 action = 'callback' , callback = _bag_info_store )
886906 return parser
887907
908+
888909def _configure_logging (opts ):
889- log_format = "%(asctime)s - %(levelname)s - %(message)s"
910+ log_format = "%(asctime)s - %(levelname)s - %(message)s"
890911 if opts .quiet :
891912 level = logging .ERROR
892913 else :
0 commit comments