6969
7070checksum_algos = ['md5' , 'sha1' , 'sha256' , 'sha512' ]
7171
72+ BOM = codecs .BOM_UTF8
73+ if sys .version_info [0 ] >= 3 :
74+ BOM = BOM .decode ('utf-8' )
75+
7276
7377def make_bag (bag_dir , bag_info = None , processes = 1 , checksum = None ):
7478 """
@@ -127,7 +131,8 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
127131
128132 logging .info ("writing bagit.txt" )
129133 txt = """BagIt-Version: 0.97\n Tag-File-Character-Encoding: UTF-8\n """
130- open ("bagit.txt" , "wb" ).write (txt )
134+ with open ("bagit.txt" , "w" ) as bagit_file :
135+ bagit_file .write (txt )
131136
132137 logging .info ("writing bag-info.txt" )
133138 if bag_info is None :
@@ -143,7 +148,7 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
143148
144149 _make_tagmanifest_file ('tagmanifest-md5.txt' , bag_dir )
145150
146- except Exception , e :
151+ except Exception as e :
147152 os .chdir (old_dir )
148153 logging .exception (e )
149154 raise e
@@ -188,7 +193,7 @@ def _open(self):
188193 try :
189194 self .version = tags ["BagIt-Version" ]
190195 self .encoding = tags ["Tag-File-Character-Encoding" ]
191- except KeyError , e :
196+ except KeyError as e :
192197 raise BagError ("Missing required tag in bagit.txt: %s" % e )
193198
194199 if self .version in ["0.93" , "0.94" , "0.95" ]:
@@ -253,7 +258,7 @@ def payload_files(self):
253258
254259 def payload_entries (self ):
255260 # Don't use dict comprehension (compatibility with Python < 2.7)
256- return dict ((key , value ) for (key , value ) in self .entries .iteritems () \
261+ return dict ((key , value ) for (key , value ) in self .entries .items () \
257262 if key .startswith ("data" + os .sep ))
258263
259264 def save (self , processes = 1 , manifests = False ):
@@ -314,7 +319,7 @@ def save(self, processes=1, manifests=False):
314319 os .chdir (old_dir )
315320
316321 def tagfile_entries (self ):
317- return dict ((key , value ) for (key , value ) in self .entries .iteritems () \
322+ return dict ((key , value ) for (key , value ) in self .entries .items () \
318323 if not key .startswith ("data" + os .sep ))
319324
320325 def missing_optional_tagfiles (self ):
@@ -325,7 +330,7 @@ def missing_optional_tagfiles(self):
325330 only check for entries with missing files (not missing
326331 entries for existing files).
327332 """
328- for tagfilepath in self .tagfile_entries ().keys ():
333+ for tagfilepath in list ( self .tagfile_entries ().keys () ):
329334 if not os .path .isfile (os .path .join (self .path , tagfilepath )):
330335 yield tagfilepath
331336
@@ -339,7 +344,7 @@ def fetch_entries(self):
339344 for line in fetch_file :
340345 parts = line .strip ().split (None , 2 )
341346 yield (parts [0 ], parts [1 ], parts [2 ])
342- except Exception , e :
347+ except Exception as e :
343348 fetch_file .close ()
344349 raise e
345350
@@ -350,7 +355,7 @@ def files_to_be_fetched(self):
350355 yield f
351356
352357 def has_oxum (self ):
353- return self . info . has_key ( 'Payload-Oxum' )
358+ return 'Payload-Oxum' in self . info
354359
355360 def validate (self , processes = 1 , fast = False ):
356361 """Checks the structure and contents are valid. If you supply
@@ -371,7 +376,7 @@ def is_valid(self, fast=False):
371376 """
372377 try :
373378 self .validate (fast = fast )
374- except BagError , e :
379+ except BagError as e :
375380 return False
376381 return True
377382
@@ -390,9 +395,7 @@ def _load_manifests(self):
390395 alg = os .path .basename (manifest_file ).replace (search , "" ).replace (".txt" , "" )
391396 self .algs .append (alg )
392397
393- manifest_file = open (manifest_file , 'rb' )
394-
395- try :
398+ with open (manifest_file , 'r' ) as manifest_file :
396399 for line in manifest_file :
397400 line = line .strip ()
398401
@@ -410,13 +413,11 @@ def _load_manifests(self):
410413 entry_path = os .path .normpath (entry [1 ].lstrip ("*" ))
411414 entry_path = _decode_filename (entry_path )
412415
413- if self .entries . has_key ( entry_path ) :
416+ if entry_path in self .entries :
414417 self .entries [entry_path ][alg ] = entry_hash
415418 else :
416419 self .entries [entry_path ] = {}
417420 self .entries [entry_path ][alg ] = entry_hash
418- finally :
419- manifest_file .close ()
420421
421422 def _validate_structure (self ):
422423 """Checks the structure of the bag, determining if it conforms to the
@@ -461,8 +462,8 @@ def _validate_oxum(self):
461462 if not byte_count .isdigit () or not file_count .isdigit ():
462463 raise BagError ("Invalid oxum: %s" % oxum )
463464
464- byte_count = long (byte_count )
465- file_count = long (file_count )
465+ byte_count = int (byte_count )
466+ file_count = int (file_count )
466467 total_bytes = 0
467468 total_files = 0
468469
@@ -511,11 +512,11 @@ def _validate_entries(self, processes):
511512 def _init_worker ():
512513 signal .signal (signal .SIGINT , signal .SIG_IGN )
513514
514- args = ((self .path , rel_path , hashes , available_hashers ) for rel_path , hashes in self .entries .items ())
515+ args = ((self .path , rel_path , hashes , available_hashers ) for rel_path , hashes in list ( self .entries .items () ))
515516
516517 try :
517518 if processes == 1 :
518- hash_results = map (_calc_hashes , args )
519+ hash_results = list ( map (_calc_hashes , args ) )
519520 else :
520521 try :
521522 pool = multiprocessing .Pool (processes if processes else None , _init_worker )
@@ -532,7 +533,7 @@ def _init_worker():
532533 raise
533534
534535 for rel_path , f_hashes , hashes in hash_results :
535- for alg , computed_hash in f_hashes .items ():
536+ for alg , computed_hash in list ( f_hashes .items () ):
536537 stored_hash = hashes [alg ]
537538 if stored_hash .lower () != computed_hash :
538539 e = ChecksumMismatch (rel_path , alg , stored_hash .lower (), computed_hash )
@@ -547,10 +548,10 @@ def _validate_bagittxt(self):
547548 Verify that bagit.txt conforms to specification
548549 """
549550 bagit_file_path = os .path .join (self .path , "bagit.txt" )
550- bagit_file = open (bagit_file_path , 'rb ' )
551+ bagit_file = open (bagit_file_path , 'r ' )
551552 try :
552553 first_line = bagit_file .readline ()
553- if first_line .startswith (codecs . BOM_UTF8 ):
554+ if first_line .startswith (BOM ):
554555 raise BagValidationError ("bagit.txt must not contain a byte-order mark" )
555556 finally :
556557 bagit_file .close ()
@@ -591,7 +592,9 @@ def __str__(self):
591592 return "%s exists on filesystem but is not in manifest" % self .path
592593
593594
594- def _calc_hashes ((base_path , rel_path , hashes , available_hashes )):
595+ def _calc_hashes (args ):
596+ # auto unpacking of sequences illegal in Python3
597+ (base_path , rel_path , hashes , available_hashes ) = args
595598 full_path = os .path .join (base_path , rel_path )
596599
597600 # Create a clone of the default empty hash objects:
@@ -601,9 +604,9 @@ def _calc_hashes((base_path, rel_path, hashes, available_hashes)):
601604
602605 try :
603606 f_hashes = _calculate_file_hashes (full_path , f_hashers )
604- except BagValidationError , e :
607+ except BagValidationError as e :
605608 f_hashes = dict (
606- (alg , str (e )) for alg in f_hashers .keys ()
609+ (alg , str (e )) for alg in list ( f_hashers .keys () )
607610 )
608611
609612 return rel_path , f_hashes , hashes
@@ -624,11 +627,11 @@ def _calculate_file_hashes(full_path, f_hashers):
624627 block = f .read (1048576 )
625628 if not block :
626629 break
627- for i in f_hashers .values ():
630+ for i in list ( f_hashers .values () ):
628631 i .update (block )
629- except IOError , e :
632+ except IOError as e :
630633 raise BagValidationError ("could not read %s: %s" % (full_path , str (e )))
631- except OSError , e :
634+ except OSError as e :
632635 raise BagValidationError ("could not read %s: %s" % (full_path , str (e )))
633636 finally :
634637 try :
@@ -637,12 +640,12 @@ def _calculate_file_hashes(full_path, f_hashers):
637640 pass
638641
639642 return dict (
640- (alg , h .hexdigest ()) for alg , h in f_hashers .items ()
643+ (alg , h .hexdigest ()) for alg , h in list ( f_hashers .items () )
641644 )
642645
643646
644647def _load_tag_file (tag_file_name ):
645- tag_file = codecs . open (tag_file_name , 'r' , 'utf-8-sig ' )
648+ tag_file = open (tag_file_name , 'r' )
646649
647650 try :
648651 # Store duplicate tags as list of vals
@@ -676,7 +679,11 @@ def _parse_tags(file):
676679
677680 # Line folding is handled by yielding values only after we encounter
678681 # the start of a new tag, or if we pass the EOF.
679- for line in file :
682+ for num , line in enumerate (file ):
683+ # If byte-order mark ignore it for now.
684+ if num == 0 :
685+ if line .startswith (BOM ):
686+ line = line .lstrip (BOM )
680687 # Skip over any empty or blank lines.
681688 if len (line ) == 0 or line .isspace ():
682689 continue
@@ -700,9 +707,9 @@ def _parse_tags(file):
700707
701708
702709def _make_tag_file (bag_info_path , bag_info ):
703- headers = bag_info .keys ()
710+ headers = list ( bag_info .keys () )
704711 headers .sort ()
705- with codecs . open (bag_info_path , 'w' , 'utf-8 ' ) as f :
712+ with open (bag_info_path , 'w' ) as f :
706713 for h in headers :
707714 if type (bag_info [h ]) == list :
708715 for val in bag_info [h ]:
@@ -738,18 +745,18 @@ def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'):
738745 pool .close ()
739746 pool .join ()
740747 else :
741- checksums = map (manifest_line , _walk (data_dir ))
748+ checksums = list ( map (manifest_line , _walk (data_dir ) ))
742749
743- manifest = open (manifest_file , 'wb' )
744- num_files = 0
745- total_bytes = 0
750+ with open (manifest_file , 'w' ) as manifest :
751+ num_files = 0
752+ total_bytes = 0
746753
747- for digest , filename , bytes in checksums :
748- num_files += 1
749- total_bytes += bytes
750- manifest .write ("%s %s\n " % (digest , _encode_filename (filename )))
751- manifest .close ()
752- return "%s.%s" % (total_bytes , num_files )
754+ for digest , filename , bytes in checksums :
755+ num_files += 1
756+ total_bytes += bytes
757+ manifest .write ("%s %s\n " % (digest , _encode_filename (filename )))
758+ manifest .close ()
759+ return "%s.%s" % (total_bytes , num_files )
753760
754761
755762def _make_tagmanifest_file (tagmanifest_file , bag_dir ):
@@ -768,10 +775,9 @@ def _make_tagmanifest_file(tagmanifest_file, bag_dir):
768775 checksums .append ((m .hexdigest (), f ))
769776 fh .close ()
770777
771- tagmanifest = open (join (bag_dir , tagmanifest_file ), 'wb' )
772- for digest , filename in checksums :
773- tagmanifest .write ('%s %s\n ' % (digest , filename ))
774- tagmanifest .close ()
778+ with open (join (bag_dir , tagmanifest_file ), 'w' ) as tagmanifest :
779+ for digest , filename in checksums :
780+ tagmanifest .write ('%s %s\n ' % (digest , filename ))
775781
776782
777783def _walk (data_dir ):
@@ -928,7 +934,7 @@ def _configure_logging(opts):
928934 log .info ("%s valid according to Payload-Oxum" , bag_dir )
929935 else :
930936 log .info ("%s is valid" , bag_dir )
931- except BagError , e :
937+ except BagError as e :
932938 log .info ("%s is invalid: %s" , bag_dir , e )
933939 rc = 1
934940
0 commit comments