Skip to content

Commit 24872a9

Browse files
committed
Now works with Python 2.7
1 parent 11a1f07 commit 24872a9

File tree

2 files changed

+15
-9
lines changed

2 files changed

+15
-9
lines changed

tika/tests/tests_unpack.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
1+
# coding=utf8
2+
13
import unittest
24
from tempfile import NamedTemporaryFile
35
from tika import unpack
46

57

68
class CreateTest(unittest.TestCase):
79
"Test different encodings"
8-
text_utf8 = "Hello, world!! 😎 👽"
9-
text_ascii = "Hello, world!!"
10+
text_utf8 = u"Hello, world!! 😎 👽"
11+
text_ascii = u"Hello, world!!"
1012

1113
def test_utf8(self):
12-
with NamedTemporaryFile("w+t", prefix='tika-python', suffix='.txt', dir='/tmp', encoding="utf8") as f:
13-
f.write(self.text_utf8)
14+
with NamedTemporaryFile("w+b", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
15+
f.write(self.text_utf8.encode("utf8"))
1416
f.flush()
1517
f.seek(0)
1618
parsed = unpack.from_file(f.name)
1719
self.assertEqual(parsed["content"].strip(), self.text_utf8)
1820

1921
def test_ascii(self):
20-
with NamedTemporaryFile("w+t", prefix='tika-python', suffix='.txt', dir='/tmp', encoding="utf8") as f:
22+
with NamedTemporaryFile("w+t", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
2123
f.write(self.text_ascii)
2224
f.flush()
2325
f.seek(0)

tika/unpack.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
# tarfile returned object can be used as is in earlier versions.
2929
_text_wrapper = TextIOWrapper if version_info.major >= 3 else lambda x: x
3030

31+
3132
def from_file(filename, serverEndpoint=ServerEndpoint):
3233
'''
3334
Parse from file
@@ -36,7 +37,7 @@ def from_file(filename, serverEndpoint=ServerEndpoint):
3637
:return:
3738
'''
3839
tarOutput = parse1('unpack', filename, serverEndpoint,
39-
responseMimeType='application/x-tar',
40+
responseMimeType='application/x-tar',
4041
services={'meta': '/meta', 'text': '/tika',
4142
'all': '/rmeta/xml', 'unpack': '/unpack/all'},
4243
rawResponse=True)
@@ -52,8 +53,8 @@ def from_buffer(string, serverEndpoint=ServerEndpoint):
5253
'''
5354
status, response = callServer('put', serverEndpoint, '/unpack/all', string,
5455
{'Accept': 'application/x-tar'}, False,
55-
rawResponse=True)
56-
56+
rawResponse=True)
57+
5758
return _parse((status, response))
5859

5960

@@ -96,7 +97,10 @@ def _parse(tarOutput):
9697

9798
contentMember = tarFile.getmember("__TEXT__")
9899
if not contentMember.issym() and contentMember.isfile():
99-
content = _text_wrapper(tarFile.extractfile(contentMember), encoding='utf8').read()
100+
if version_info.major >= 3:
101+
content = _text_wrapper(tarFile.extractfile(contentMember), encoding='utf8').read()
102+
else:
103+
content = tarFile.extractfile(contentMember).read().decode('utf8')
100104

101105
# get the remaining files as attachments
102106
attachments = {}

0 commit comments

Comments
 (0)