Skip to content

Commit c136712

Browse files
committed
Prefer libarchive for Rar extraction
Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent aa3ea61 commit c136712

File tree

6 files changed

+31
-13
lines changed

6 files changed

+31
-13
lines changed

src/extractcode/archive.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def try_to_extract(location, target_dir, extractor):
411411
extract_springboot = functools.partial(try_to_extract, extractor=extract_zip)
412412

413413
extract_iso = sevenzip.extract
414-
extract_rar = sevenzip.extract
414+
extract_rar = libarchive2.extract
415415
extract_rpm = sevenzip.extract
416416
extract_xz = sevenzip.extract
417417
extract_lzma = sevenzip.extract
@@ -703,7 +703,7 @@ def try_to_extract(location, target_dir, extractor):
703703
extensions=('.rar',),
704704
kind=regular,
705705
extractors=[extract_rar],
706-
strict=False
706+
strict=True
707707
)
708708

709709
CabHandler = Handler(
-134 KB
Binary file not shown.
-96 KB
Binary file not shown.
-94.5 KB
Binary file not shown.

src/extractcode/sevenzip.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,17 @@ def list_extracted_7z_files(stdout):
114114
return get_file_list(stdout)
115115

116116

117+
def is_rar(location):
118+
"""
119+
Return True if the file at location is a RAR archive.
120+
"""
121+
if not os.path.exists(location):
122+
return
123+
from typecode import contenttype
124+
T = contenttype.get_type(location)
125+
return T.filetype_file.lower().startswith('rar archive')
126+
127+
117128
def extract(location, target_dir, arch_type='*'):
118129
"""
119130
Extract all files from a 7zip-supported archive file at location in the
@@ -128,6 +139,9 @@ def extract(location, target_dir, arch_type='*'):
128139
abs_location = os.path.abspath(os.path.expanduser(location))
129140
abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
130141

142+
if is_rar(location):
143+
raise ExtractErrorFailedToExtract('RAR extraction disactivated')
144+
131145
# note: there are some issues with the extraction of debian .deb ar files
132146
# see sevenzip bug http://sourceforge.net/p/sevenzip/bugs/1472/
133147

@@ -189,6 +203,10 @@ def list_entries(location, arch_type='*'):
189203
"""
190204
assert location
191205
abs_location = os.path.abspath(os.path.expanduser(location))
206+
207+
if is_rar(location):
208+
return []
209+
192210
# 7z arguments
193211
listing = 'l'
194212

tests/extractcode/test_archive.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,7 +1596,7 @@ def test_extract_rar_with_trailing_data(self):
15961596
def test_extract_rar_broken(self):
15971597
test_file = self.get_test_loc('archive/rar/broken.rar')
15981598
test_dir = self.get_temp_dir()
1599-
expected = Exception('Unknown extraction error')
1599+
expected = Exception('Header CRC error')
16001600
self.assertRaisesInstance(expected, archive.extract_rar, test_file, test_dir)
16011601

16021602
def test_extract_rar_with_relative_path(self):
@@ -1627,21 +1627,19 @@ def test_extract_rar_with_absolute_path(self):
16271627
def test_extract_rar_with_password(self):
16281628
test_file = self.get_test_loc('archive/rar/rar_password.rar')
16291629
test_dir = self.get_temp_dir()
1630-
expected = Exception('Password protected archive, unable to extract')
1630+
expected = Exception('RAR encryption support unavailable.')
16311631
self.assertRaisesInstance(expected, archive.extract_rar,
16321632
test_file, test_dir)
16331633

16341634
def test_extract_rar_with_non_ascii_path(self):
16351635
test_file = self.get_test_loc('archive/rar/non_ascii_corrupted.rar')
1636-
# The bug only occurs if the path was given as Unicode !
1636+
# The bug only occurs if the path was given as Unicode
16371637
test_file = unicode(test_file)
16381638
test_dir = self.get_temp_dir()
16391639
# raise an exception but still extracts some
1640-
expected = Exception('Unknown extraction error')
1641-
self.assertRaisesInstance(expected, archive.extract_rar,
1642-
test_file, test_dir)
1643-
result = os.path.join(test_dir, 'EdoProject_java/WebContent'
1644-
'/WEB-INF/lib/cos.jar')
1640+
expected = Exception('Prefix found')
1641+
self.assertRaisesInstance(expected, archive.extract_rar, test_file, test_dir)
1642+
result = os.path.join(test_dir, 'EdoProject_java/WebContent/WEB-INF/lib/cos.jar')
16451643
assert os.path.exists(result)
16461644

16471645

@@ -2024,21 +2022,22 @@ class TestCbr(BaseArchiveTestCase):
20242022
def test_get_extractor_cbr(self):
20252023
test_file = self.get_test_loc('archive/cbr/t.cbr')
20262024
result = archive.get_extractor(test_file)
2027-
expected = archive.extract_rar
2025+
# we do not handle these rare extensions (this is a RAR)
2026+
expected = None # archive.extract_rar
20282027
assert expected == result
20292028

20302029
def test_extract_cbr_basic(self):
20312030
test_file = self.get_test_loc('archive/cbr/t.cbr')
20322031
test_dir = self.get_temp_dir()
2033-
archive.extract_cab(test_file, test_dir)
2032+
libarchive2.extract(test_file, test_dir)
20342033
extracted = self.collect_extracted_path(test_dir)
20352034
expected = ['/t/', '/t/t.txt']
20362035
assert expected == extracted
20372036

20382037
def test_extract_cbr_basic_with_weird_filename_extension(self):
20392038
test_file = self.get_test_loc('archive/cbr/t.cbr.foo')
20402039
test_dir = self.get_temp_dir()
2041-
archive.extract_cab(test_file, test_dir)
2040+
libarchive2.extract(test_file, test_dir)
20422041
extracted = self.collect_extracted_path(test_dir)
20432042
expected = ['/t/', '/t/t.txt']
20442043
assert expected == extracted
@@ -2300,6 +2299,7 @@ def test_extract_iso_with_weird_filenames_with_sevenzip(self):
23002299
test_file = self.get_test_loc('archive/weird_names/weird_names.iso')
23012300
self.check_extract(sevenzip.extract, test_file, expected_warnings=[], expected_suffix='7zip')
23022301

2302+
@expectedFailure # not a problem: we now use libarchive for these
23032303
def test_extract_rar_with_weird_filenames_with_sevenzip(self):
23042304
test_file = self.get_test_loc('archive/weird_names/weird_names.rar')
23052305
self.check_extract(sevenzip.extract, test_file, expected_warnings=[], expected_suffix='7zip')

0 commit comments

Comments
 (0)