Skip to content

Commit f3a541b

Browse files
authored
Merge pull request #392 from nexB/387-apple-pkg-xar-archive-support
#387 Initial support to extract XAR archives and Apple pkg or mpkg
2 parents 1e3ade0 + 25eac5d commit f3a541b

File tree

5 files changed

+73
-31
lines changed

5 files changed

+73
-31
lines changed

src/extractcode/archive.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,13 @@
5151

5252

5353
logger = logging.getLogger(__name__)
54-
DEBUG = False
55-
DEBUG_DEEP = False
56-
# import sys
57-
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
58-
# logger.setLevel(logging.DEBUG)
54+
TRACE = False
55+
TRACE_DEEP = False
56+
57+
if TRACE:
58+
import sys
59+
logging.basicConfig(stream=sys.stdout)
60+
logger.setLevel(logging.DEBUG)
5961

6062

6163

@@ -176,15 +178,16 @@ def get_handlers(location):
176178
mime_matched = handler.mimetypes and any(m in mtype for m in handler.mimetypes)
177179
extension_matched = handler.extensions and location.lower().endswith(handler.extensions)
178180

179-
if DEBUG_DEEP:
180-
logger.debug('get_handlers: %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals())
181+
if TRACE_DEEP:
182+
handler_name = handler.name
183+
logger.debug('get_handlers: considering %(handler_name)r handler for %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals())
181184
logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals())
182185

183186
if handler.strict and not all([type_matched, mime_matched, extension_matched]):
184187
continue
185188

186189
if type_matched or mime_matched or extension_matched:
187-
if DEBUG_DEEP:
190+
if TRACE_DEEP:
188191
logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals())
189192
logger.debug('get_handlers: %(location)s: handler: %(handler)r' % locals())
190193
yield handler, type_matched, mime_matched, extension_matched
@@ -300,7 +303,7 @@ def extract_twice(location, target_dir, extractor1, extractor2):
300303
# extract first the intermediate payload to a temp dir
301304
temp_target = unicode(fileutils.get_temp_dir('extract'))
302305
warnings = extractor1(abs_location, temp_target)
303-
if DEBUG:
306+
if TRACE:
304307
logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())
305308

306309
# extract this intermediate payload to the final target_dir
@@ -310,7 +313,7 @@ def extract_twice(location, target_dir, extractor1, extractor2):
310313
warnings.append(location + ': No files found in archive.')
311314
else:
312315
for extracted1_loc in inner_archives:
313-
if DEBUG:
316+
if TRACE:
314317
logger.debug('extract_twice: extractor2: %(extracted1_loc)r' % locals())
315318
warnings.extend(extractor2(extracted1_loc, abs_target_dir))
316319
finally:
@@ -334,14 +337,14 @@ def extract_with_fallback(location, target_dir, extractor1, extractor2):
334337
temp_target1 = unicode(fileutils.get_temp_dir('extract1'))
335338
try:
336339
warnings = extractor1(abs_location, temp_target1)
337-
if DEBUG:
340+
if TRACE:
338341
logger.debug('extract_with_fallback: temp_target1: %(temp_target1)r' % locals())
339342
fileutils.copytree(temp_target1, abs_target_dir)
340343
except:
341344
try:
342345
temp_target2 = unicode(fileutils.get_temp_dir('extract2'))
343346
warnings = extractor2(abs_location, temp_target2)
344-
if DEBUG:
347+
if TRACE:
345348
logger.debug('extract_with_fallback: temp_target2: %(temp_target2)r' % locals())
346349
fileutils.copytree(temp_target2, abs_target_dir)
347350
finally:
@@ -375,6 +378,7 @@ def extract_with_fallback(location, target_dir, extractor1, extractor2):
375378
extract_nsis = sevenzip.extract
376379
extract_ishield = sevenzip.extract
377380
extract_Z = sevenzip.extract
381+
extract_xarpkg = sevenzip.extract
378382

379383

380384
# Archive handlers.
@@ -799,6 +803,26 @@ def extract_with_fallback(location, target_dir, extractor1, extractor2):
799803
strict=True
800804
)
801805

806+
ApplePkgHandler = Handler(
807+
name='Apple pkg or mpkg package installer',
808+
filetypes=('xar archive',),
809+
mimetypes=('application/octet-stream',),
810+
extensions=('.pkg', '.mpkg',),
811+
kind=package,
812+
extractors=[extract_xarpkg],
813+
strict=True
814+
)
815+
816+
XarHandler = Handler(
817+
name='Xar archive v1',
818+
filetypes=('xar archive',),
819+
mimetypes=('application/octet-stream',),
820+
extensions=('.xar',),
821+
kind=package,
822+
extractors=[extract_xarpkg],
823+
strict=True
824+
)
825+
802826
IsoImageHandler = Handler(
803827
name='ISO CD image',
804828
filetypes=('iso 9660 cd-rom', 'high sierra cd-rom',),
@@ -858,6 +882,8 @@ def extract_with_fallback(location, target_dir, extractor1, extractor2):
858882
RarHandler,
859883
CabHandler,
860884
MsiInstallerHandler,
885+
ApplePkgHandler,
886+
XarHandler,
861887
# notes: this may catch all exe and fails too often
862888
InstallShieldHandler,
863889
NSISInstallerHandler,

src/extractcode/extract.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@
3737
from extractcode import archive
3838

3939
logger = logging.getLogger(__name__)
40-
DEBUG = False
41-
# import sys
42-
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
43-
# logger.setLevel(logging.DEBUG)
40+
TRACE = False
41+
42+
if TRACE:
43+
import sys
44+
logging.basicConfig(stream=sys.stdout)
45+
logger.setLevel(logging.DEBUG)
4446

4547

4648
"""
@@ -118,46 +120,46 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False):
118120
first before resuming the file system walk.
119121
"""
120122
ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={})
121-
if DEBUG:
123+
if TRACE:
122124
logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals())
123125
abs_location = abspath(expanduser(location))
124126
for top, dirs, files in fileutils.walk(abs_location, ignored):
125-
if DEBUG:
127+
if TRACE:
126128
logger.debug('extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals())
127129

128130
if not recurse:
129-
if DEBUG:
131+
if TRACE:
130132
drs = set(dirs)
131133
for d in dirs[:]:
132134
if extractcode.is_extraction_path(d):
133135
dirs.remove(d)
134-
if DEBUG:
136+
if TRACE:
135137
logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs))))
136138
for f in files:
137139
loc = join(top, f)
138140
if not recurse and extractcode.is_extraction_path(loc):
139-
if DEBUG:
141+
if TRACE:
140142
logger.debug('extract:walk not recurse: skipped file: %(loc)r' % locals())
141143
continue
142144

143145
if not archive.should_extract(loc, kinds):
144-
if DEBUG:
146+
if TRACE:
145147
logger.debug('extract:walk: skipped file: not should_extract: %(loc)r' % locals())
146148
continue
147149

148150
target = join(abspath(top), extractcode.get_extraction_path(loc))
149-
if DEBUG:
151+
if TRACE:
150152
logger.debug('extract:target: %(target)r' % locals())
151153
for xevent in extract_file(loc, target, kinds):
152-
if DEBUG:
154+
if TRACE:
153155
logger.debug('extract:walk:extraction event: %(xevent)r' % locals())
154156
yield xevent
155157

156158
if recurse:
157-
if DEBUG:
159+
if TRACE:
158160
logger.debug('extract:walk: recursing on target: %(target)r' % locals())
159161
for xevent in extract(target, kinds, recurse):
160-
if DEBUG:
162+
if TRACE:
161163
logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals())
162164
yield xevent
163165

@@ -170,8 +172,8 @@ def extract_file(location, target, kinds=extractcode.default_kinds):
170172
warnings = []
171173
errors = []
172174
extractor = archive.get_extractor(location, kinds)
173-
if DEBUG:
174-
logger.debug('extract_file: extractor: for: %(location)r with kinds: r(kinds)r : ' % locals()
175+
if TRACE:
176+
logger.debug('extract_file: extractor: for: %(location)r with kinds: %(kinds)r : ' % locals()
175177
+ getattr(extractor, '__module__', '')
176178
+ '.' + getattr(extractor, '__name__', ''))
177179
if extractor:
@@ -181,12 +183,12 @@ def extract_file(location, target, kinds=extractcode.default_kinds):
181183
# if there is an error, the extracted files will not be moved
182184
# to target
183185
tmp_tgt = fileutils.get_temp_dir('extract')
184-
abs_location= abspath(expanduser(location))
186+
abs_location = abspath(expanduser(location))
185187
warnings.extend(extractor(abs_location, tmp_tgt))
186188
fileutils.copytree(tmp_tgt, target)
187189
fileutils.delete(tmp_tgt)
188190
except Exception, e:
189-
if DEBUG:
191+
if TRACE:
190192
logger.debug('extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals())
191193
errors = [str(e).strip(' \'"')]
192194
finally:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
date: 2016-12-03
2+
download_url: https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/xar/xar-1.4.xar
154 KB
Binary file not shown.

tests/extractcode/test_archive.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (c) 2015 nexB Inc. and others. All rights reserved.
2+
# Copyright (c) 2016 nexB Inc. and others. All rights reserved.
33
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
44
# The ScanCode software is licensed under the Apache License version 2.0.
55
# Data generated with ScanCode require an acknowledgment.
@@ -91,6 +91,7 @@ def test_get_extractors(self):
9191
('archive/7z/z.7z', [archive.extract_7z]),
9292
('archive/Z/tr2tex.Z', [archive.extract_Z, ]),
9393
('archive/Z/tkWWW-0.11.tar.Z', [archive.extract_Z, archive.extract_tar]),
94+
('archive/xar/xar-1.4.xar', [archive.extract_xarpkg]),
9495
]
9596

9697
for test_file, expected in test_data:
@@ -1863,3 +1864,14 @@ def test_extract_z_compress_basic(self):
18631864
archive.extract_Z(test_file, test_dir)
18641865
result = os.path.join(test_dir, 'tr2tex')
18651866
assert os.path.exists(result)
1867+
1868+
1869+
class TestXar(BaseArchiveTestCase):
1870+
def test_extract_xar_basic(self):
1871+
test_file = self.get_test_loc('archive/xar/xar-1.4.xar')
1872+
test_dir = self.get_temp_dir()
1873+
archive.extract_Z(test_file, test_dir)
1874+
result = os.path.join(test_dir, '[TOC].xml')
1875+
assert os.path.exists(result)
1876+
result = os.path.join(test_dir, 'xar-1.4', 'Makefile.in')
1877+
assert os.path.exists(result)

0 commit comments

Comments
 (0)