Skip to content

Commit 6ee6d7b

Browse files
committed
Merge pull request #30 from joshuago78/python2and3
Python 2 & 3 compatibility
2 parents 4d6d8ff + 2570e3f commit 6ee6d7b

File tree

6 files changed

+170
-125
lines changed

6 files changed

+170
-125
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ language: python
22
python:
33
- "2.7"
44
- "2.6"
5+
- "3.2"
6+
- "3.3"
7+
- "3.4"
58
script: python setup.py test
69
install:
710
# this can go away when this is resolved satisfactorily

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ needed or you can install globally with:
1313

1414
pip install bagit
1515

16-
Python v2.4+ is required.
16+
Python v2.6+ is required.
1717

1818
Command Line Usage
1919
------------------

bagit.py

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@
6969

7070
checksum_algos = ['md5', 'sha1', 'sha256', 'sha512']
7171

72+
BOM = codecs.BOM_UTF8
73+
if sys.version_info[0] >= 3:
74+
BOM = BOM.decode('utf-8')
75+
7276

7377
def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
7478
"""
@@ -127,7 +131,8 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
127131

128132
logging.info("writing bagit.txt")
129133
txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n"""
130-
open("bagit.txt", "wb").write(txt)
134+
with open("bagit.txt", "w") as bagit_file:
135+
bagit_file.write(txt)
131136

132137
logging.info("writing bag-info.txt")
133138
if bag_info is None:
@@ -143,7 +148,7 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
143148

144149
_make_tagmanifest_file('tagmanifest-md5.txt', bag_dir)
145150

146-
except Exception, e:
151+
except Exception as e:
147152
os.chdir(old_dir)
148153
logging.exception(e)
149154
raise e
@@ -188,7 +193,7 @@ def _open(self):
188193
try:
189194
self.version = tags["BagIt-Version"]
190195
self.encoding = tags["Tag-File-Character-Encoding"]
191-
except KeyError, e:
196+
except KeyError as e:
192197
raise BagError("Missing required tag in bagit.txt: %s" % e)
193198

194199
if self.version in ["0.93", "0.94", "0.95"]:
@@ -253,7 +258,7 @@ def payload_files(self):
253258

254259
def payload_entries(self):
255260
# Don't use dict comprehension (compatibility with Python < 2.7)
256-
return dict((key, value) for (key, value) in self.entries.iteritems() \
261+
return dict((key, value) for (key, value) in self.entries.items() \
257262
if key.startswith("data" + os.sep))
258263

259264
def save(self, processes=1, manifests=False):
@@ -314,7 +319,7 @@ def save(self, processes=1, manifests=False):
314319
os.chdir(old_dir)
315320

316321
def tagfile_entries(self):
317-
return dict((key, value) for (key, value) in self.entries.iteritems() \
322+
return dict((key, value) for (key, value) in self.entries.items() \
318323
if not key.startswith("data" + os.sep))
319324

320325
def missing_optional_tagfiles(self):
@@ -325,7 +330,7 @@ def missing_optional_tagfiles(self):
325330
only check for entries with missing files (not missing
326331
entries for existing files).
327332
"""
328-
for tagfilepath in self.tagfile_entries().keys():
333+
for tagfilepath in list(self.tagfile_entries().keys()):
329334
if not os.path.isfile(os.path.join(self.path, tagfilepath)):
330335
yield tagfilepath
331336

@@ -339,7 +344,7 @@ def fetch_entries(self):
339344
for line in fetch_file:
340345
parts = line.strip().split(None, 2)
341346
yield (parts[0], parts[1], parts[2])
342-
except Exception, e:
347+
except Exception as e:
343348
fetch_file.close()
344349
raise e
345350

@@ -350,7 +355,7 @@ def files_to_be_fetched(self):
350355
yield f
351356

352357
def has_oxum(self):
353-
return self.info.has_key('Payload-Oxum')
358+
return 'Payload-Oxum' in self.info
354359

355360
def validate(self, processes=1, fast=False):
356361
"""Checks the structure and contents are valid. If you supply
@@ -371,7 +376,7 @@ def is_valid(self, fast=False):
371376
"""
372377
try:
373378
self.validate(fast=fast)
374-
except BagError, e:
379+
except BagError as e:
375380
return False
376381
return True
377382

@@ -390,9 +395,7 @@ def _load_manifests(self):
390395
alg = os.path.basename(manifest_file).replace(search, "").replace(".txt", "")
391396
self.algs.append(alg)
392397

393-
manifest_file = open(manifest_file, 'rb')
394-
395-
try:
398+
with open(manifest_file, 'r') as manifest_file:
396399
for line in manifest_file:
397400
line = line.strip()
398401

@@ -410,13 +413,11 @@ def _load_manifests(self):
410413
entry_path = os.path.normpath(entry[1].lstrip("*"))
411414
entry_path = _decode_filename(entry_path)
412415

413-
if self.entries.has_key(entry_path):
416+
if entry_path in self.entries:
414417
self.entries[entry_path][alg] = entry_hash
415418
else:
416419
self.entries[entry_path] = {}
417420
self.entries[entry_path][alg] = entry_hash
418-
finally:
419-
manifest_file.close()
420421

421422
def _validate_structure(self):
422423
"""Checks the structure of the bag, determining if it conforms to the
@@ -461,8 +462,8 @@ def _validate_oxum(self):
461462
if not byte_count.isdigit() or not file_count.isdigit():
462463
raise BagError("Invalid oxum: %s" % oxum)
463464

464-
byte_count = long(byte_count)
465-
file_count = long(file_count)
465+
byte_count = int(byte_count)
466+
file_count = int(file_count)
466467
total_bytes = 0
467468
total_files = 0
468469

@@ -511,11 +512,11 @@ def _validate_entries(self, processes):
511512
def _init_worker():
512513
signal.signal(signal.SIGINT, signal.SIG_IGN)
513514

514-
args = ((self.path, rel_path, hashes, available_hashers) for rel_path, hashes in self.entries.items())
515+
args = ((self.path, rel_path, hashes, available_hashers) for rel_path, hashes in list(self.entries.items()))
515516

516517
try:
517518
if processes == 1:
518-
hash_results = map(_calc_hashes, args)
519+
hash_results = list(map(_calc_hashes, args))
519520
else:
520521
try:
521522
pool = multiprocessing.Pool(processes if processes else None, _init_worker)
@@ -532,7 +533,7 @@ def _init_worker():
532533
raise
533534

534535
for rel_path, f_hashes, hashes in hash_results:
535-
for alg, computed_hash in f_hashes.items():
536+
for alg, computed_hash in list(f_hashes.items()):
536537
stored_hash = hashes[alg]
537538
if stored_hash.lower() != computed_hash:
538539
e = ChecksumMismatch(rel_path, alg, stored_hash.lower(), computed_hash)
@@ -547,10 +548,10 @@ def _validate_bagittxt(self):
547548
Verify that bagit.txt conforms to specification
548549
"""
549550
bagit_file_path = os.path.join(self.path, "bagit.txt")
550-
bagit_file = open(bagit_file_path, 'rb')
551+
bagit_file = open(bagit_file_path, 'r')
551552
try:
552553
first_line = bagit_file.readline()
553-
if first_line.startswith(codecs.BOM_UTF8):
554+
if first_line.startswith(BOM):
554555
raise BagValidationError("bagit.txt must not contain a byte-order mark")
555556
finally:
556557
bagit_file.close()
@@ -591,7 +592,9 @@ def __str__(self):
591592
return "%s exists on filesystem but is not in manifest" % self.path
592593

593594

594-
def _calc_hashes((base_path, rel_path, hashes, available_hashes)):
595+
def _calc_hashes(args):
596+
# auto unpacking of sequences illegal in Python3
597+
(base_path, rel_path, hashes, available_hashes) = args
595598
full_path = os.path.join(base_path, rel_path)
596599

597600
# Create a clone of the default empty hash objects:
@@ -601,9 +604,9 @@ def _calc_hashes((base_path, rel_path, hashes, available_hashes)):
601604

602605
try:
603606
f_hashes = _calculate_file_hashes(full_path, f_hashers)
604-
except BagValidationError, e:
607+
except BagValidationError as e:
605608
f_hashes = dict(
606-
(alg, str(e)) for alg in f_hashers.keys()
609+
(alg, str(e)) for alg in list(f_hashers.keys())
607610
)
608611

609612
return rel_path, f_hashes, hashes
@@ -624,11 +627,11 @@ def _calculate_file_hashes(full_path, f_hashers):
624627
block = f.read(1048576)
625628
if not block:
626629
break
627-
for i in f_hashers.values():
630+
for i in list(f_hashers.values()):
628631
i.update(block)
629-
except IOError, e:
632+
except IOError as e:
630633
raise BagValidationError("could not read %s: %s" % (full_path, str(e)))
631-
except OSError, e:
634+
except OSError as e:
632635
raise BagValidationError("could not read %s: %s" % (full_path, str(e)))
633636
finally:
634637
try:
@@ -637,12 +640,12 @@ def _calculate_file_hashes(full_path, f_hashers):
637640
pass
638641

639642
return dict(
640-
(alg, h.hexdigest()) for alg, h in f_hashers.items()
643+
(alg, h.hexdigest()) for alg, h in list(f_hashers.items())
641644
)
642645

643646

644647
def _load_tag_file(tag_file_name):
645-
tag_file = codecs.open(tag_file_name, 'r', 'utf-8-sig')
648+
tag_file = open(tag_file_name, 'r')
646649

647650
try:
648651
# Store duplicate tags as list of vals
@@ -676,7 +679,11 @@ def _parse_tags(file):
676679

677680
# Line folding is handled by yielding values only after we encounter
678681
# the start of a new tag, or if we pass the EOF.
679-
for line in file:
682+
for num, line in enumerate(file):
683+
# If byte-order mark ignore it for now.
684+
if num == 0:
685+
if line.startswith(BOM):
686+
line = line.lstrip(BOM)
680687
# Skip over any empty or blank lines.
681688
if len(line) == 0 or line.isspace():
682689
continue
@@ -700,9 +707,9 @@ def _parse_tags(file):
700707

701708

702709
def _make_tag_file(bag_info_path, bag_info):
703-
headers = bag_info.keys()
710+
headers = list(bag_info.keys())
704711
headers.sort()
705-
with codecs.open(bag_info_path, 'w', 'utf-8') as f:
712+
with open(bag_info_path, 'w') as f:
706713
for h in headers:
707714
if type(bag_info[h]) == list:
708715
for val in bag_info[h]:
@@ -738,18 +745,18 @@ def _make_manifest(manifest_file, data_dir, processes, algorithm='md5'):
738745
pool.close()
739746
pool.join()
740747
else:
741-
checksums = map(manifest_line, _walk(data_dir))
748+
checksums = list(map(manifest_line, _walk(data_dir)))
742749

743-
manifest = open(manifest_file, 'wb')
744-
num_files = 0
745-
total_bytes = 0
750+
with open(manifest_file, 'w') as manifest:
751+
num_files = 0
752+
total_bytes = 0
746753

747-
for digest, filename, bytes in checksums:
748-
num_files += 1
749-
total_bytes += bytes
750-
manifest.write("%s %s\n" % (digest, _encode_filename(filename)))
751-
manifest.close()
752-
return "%s.%s" % (total_bytes, num_files)
754+
for digest, filename, bytes in checksums:
755+
num_files += 1
756+
total_bytes += bytes
757+
manifest.write("%s %s\n" % (digest, _encode_filename(filename)))
758+
manifest.close()
759+
return "%s.%s" % (total_bytes, num_files)
753760

754761

755762
def _make_tagmanifest_file(tagmanifest_file, bag_dir):
@@ -768,10 +775,9 @@ def _make_tagmanifest_file(tagmanifest_file, bag_dir):
768775
checksums.append((m.hexdigest(), f))
769776
fh.close()
770777

771-
tagmanifest = open(join(bag_dir, tagmanifest_file), 'wb')
772-
for digest, filename in checksums:
773-
tagmanifest.write('%s %s\n' % (digest, filename))
774-
tagmanifest.close()
778+
with open(join(bag_dir, tagmanifest_file), 'w') as tagmanifest:
779+
for digest, filename in checksums:
780+
tagmanifest.write('%s %s\n' % (digest, filename))
775781

776782

777783
def _walk(data_dir):
@@ -928,7 +934,7 @@ def _configure_logging(opts):
928934
log.info("%s valid according to Payload-Oxum", bag_dir)
929935
else:
930936
log.info("%s is valid", bag_dir)
931-
except BagError, e:
937+
except BagError as e:
932938
log.info("%s is invalid: %s", bag_dir, e)
933939
rc = 1
934940

bench.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# fetch some images from NASA to bag up
1515

1616
if not os.path.isdir('bench-data'):
17-
print "fetching some images to bag up from nasa"
17+
print("fetching some images to bag up from nasa")
1818
os.mkdir('bench-data')
1919
ftp = ftplib.FTP('nssdcftp.gsfc.nasa.gov')
2020
ftp.login()
@@ -24,7 +24,7 @@
2424
ftp.retrlines('NLST', files.append)
2525

2626
for file in files:
27-
print "fetching %s" % file
27+
print(("fetching %s" % file))
2828
fh = open(os.path.join('bench-data', file), 'wb')
2929
ftp.retrbinary('RETR %s' % file, fh.write)
3030
fh.close()
@@ -46,7 +46,7 @@
4646

4747
for p in range(1, 9):
4848
t = timeit.Timer(statement % p)
49-
print "create w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10))
49+
print(("create w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10))))
5050

5151

5252
# validate a bag with 1-8 processes
@@ -67,6 +67,6 @@
6767
# try 1-8 parallel processes
6868
for p in range(1, 9):
6969
t = timeit.Timer(statement % p)
70-
print "validate w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10))
70+
print(("validate w/ %s processes: %.2f seconds " % (p, (10 * t.timeit(number=10) / 10))))
7171

7272
shutil.rmtree('bench-data-bag')

setup.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from sys import version, exit
22
from setuptools import setup
33

4-
if version < '2.4.0' or version > '3.0.0':
5-
print "python 2.4 - 2.7 is required"
4+
if version < '2.6.0':
5+
print("python 2.6 or higher is required")
66
exit(1)
77

88
description = \
@@ -42,9 +42,11 @@
4242
'Topic :: Communications :: File Sharing',
4343
'Topic :: Software Development :: Libraries :: Python Modules',
4444
'Topic :: System :: Filesystems',
45-
'Programming Language :: Python :: 2.4',
46-
'Programming Language :: Python :: 2.5',
4745
'Programming Language :: Python :: 2.6',
48-
'Programming Language :: Python :: 2.7'
46+
'Programming Language :: Python :: 2.7',
47+
'Programming Language :: Python :: 3.1',
48+
'Programming Language :: Python :: 3.2',
49+
'Programming Language :: Python :: 3.3',
50+
'Programming Language :: Python :: 3.4',
4951
],
5052
)

0 commit comments

Comments
 (0)