Skip to content

Commit 51c2851

Browse files
committed
use codecs when reading/writing tag files; fixes #20
1 parent 473d3f2 commit 51c2851

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

bagit.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def make_bag(bag_dir, bag_info=None, processes=1, checksum=None):
145145

146146
except Exception, e:
147147
os.chdir(old_dir)
148-
logging.error(e)
148+
logging.exception(e)
149149
raise e
150150

151151
os.chdir(old_dir)
@@ -631,7 +631,7 @@ def _calculate_file_hashes(full_path, f_hashers):
631631

632632

633633
def _load_tag_file(tag_file_name):
634-
tag_file = open(tag_file_name, 'rb')
634+
tag_file = codecs.open(tag_file_name, 'r', 'utf-8-sig')
635635

636636
try:
637637
# Store duplicate tags as list of vals
@@ -665,17 +665,11 @@ def _parse_tags(file):
665665

666666
# Line folding is handled by yielding values only after we encounter
667667
# the start of a new tag, or if we pass the EOF.
668-
for num, line in enumerate(file):
669-
# If byte-order mark ignore it for now.
670-
if 0 == num:
671-
if line.startswith(codecs.BOM_UTF8):
672-
line = line.lstrip(codecs.BOM_UTF8)
673-
668+
for line in file:
674669
# Skip over any empty or blank lines.
675670
if len(line) == 0 or line.isspace():
676671
continue
677-
678-
if line[0].isspace() and tag_value != None : # folded line
672+
elif line[0].isspace() and tag_value != None : # folded line
679673
tag_value += line
680674
else:
681675
# Starting a new tag; yield the last one.
@@ -697,7 +691,7 @@ def _parse_tags(file):
697691
def _make_tag_file(bag_info_path, bag_info):
698692
headers = bag_info.keys()
699693
headers.sort()
700-
with open(bag_info_path, 'wb') as f:
694+
with codecs.open(bag_info_path, 'w', 'utf-8') as f:
701695
for h in headers:
702696
if type(bag_info[h]) == list:
703697
for val in bag_info[h]:

test.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
13
import os
24
import stat
35
import bagit
@@ -13,7 +15,7 @@
1315

1416

1517
# don't let < ERROR clutter up test output
16-
logging.basicConfig(level=logging.ERROR)
18+
logging.basicConfig(filename="test.log", level=logging.DEBUG)
1719

1820

1921
class TestSingleProcessValidation(unittest.TestCase):
@@ -464,5 +466,10 @@ def test_make_bag_with_newline(self):
464466
bag = bagit.make_bag(self.tmpdir, {"test": "foo\nbar"})
465467
self.assertEqual(bag.info["test"], "foobar")
466468

469+
def test_unicode_in_tags(self):
470+
bag = bagit.make_bag(self.tmpdir, {"test": u'♡'})
471+
bag = bagit.Bag(self.tmpdir)
472+
self.assertEqual(bag.info['test'], u'♡')
473+
467474
if __name__ == '__main__':
468475
unittest.main()

0 commit comments

Comments
 (0)