Skip to content

Commit e268f83

Browse files
committed
small bugfix
1 parent e8a3bfe commit e268f83

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@
99
/tika/*/*.pyc
1010
.DS_Store
1111
/setup.cfg
12+
.idea
13+

tika/unpack.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def _parse(tarOutput):
7878
metadataMember = tarFile.getmember("__METADATA__")
7979
if not metadataMember.issym() and metadataMember.isfile():
8080
metadataFile = _text_wrapper(tarFile.extractfile(metadataMember))
81-
metadataReader = csv.reader(metadataFile)
81+
metadataReader = csv.reader(_truncate_nulls(metadataFile))
8282
for metadataLine in metadataReader:
8383
# each metadata line comes as a key-value pair, with list values
8484
# returned as extra values in the line - convert single values
@@ -114,3 +114,9 @@ def _parse(tarOutput):
114114
parsed["attachments"] = attachments
115115

116116
return parsed
117+
118+
119+
# TODO: Remove if/when fixed. https://issues.apache.org/jira/browse/TIKA-3070
120+
def _truncate_nulls(s):
121+
for line in s:
122+
yield line.replace('\0', '')

0 commit comments

Comments
 (0)