Skip to content

Commit 3061713

Browse files
authored
Merge pull request #45 from nexB/extract-symlinks
Extract symlinks correctly
2 parents b4c76b9 + 050c05b commit 3061713

22 files changed

+462
-121
lines changed

CHANGELOG.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,24 @@
11
Changelog
22
=========
33

4+
v32.0.0
5+
--------
6+
7+
This is a minor release with bug fixes and an output change.
8+
9+
- We no longer support Python 3.6, only 3.7 and up.
10+
11+
- "utils.extract_tar" function now behaves correctly with links and return
12+
either a list of error message strings (the previous default) but with updated
13+
messages or a list of ExtractEvent to better track extraction errors and warnings.
14+
The behavious is driven by the "as_events" argument.
15+
16+
- In all places where extract is callable (Image, Layer) there is a new
17+
"skip_symlinks" argument defaulting to True. If True, we skip symlinks and links.
18+
The same applies with the "as_events" available in these places as these
19+
functions now return a list (rather than nothing before).
20+
21+
422
v31.1.0
523
--------
624

azure-pipelines.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,23 @@ jobs:
1111
parameters:
1212
job_name: ubuntu18_cpython
1313
image_name: ubuntu-18.04
14-
python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
14+
python_versions: ['3.7', '3.8', '3.9', '3.10']
1515
test_suites:
1616
all: venv/bin/pytest -n 2 -vvs
1717

1818
- template: etc/ci/azure-posix.yml
1919
parameters:
2020
job_name: ubuntu20_cpython
2121
image_name: ubuntu-20.04
22-
python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
22+
python_versions: ['3.7', '3.8', '3.9', '3.10']
2323
test_suites:
2424
all: venv/bin/pytest -n 2 -vvs
2525

2626
- template: etc/ci/azure-posix.yml
2727
parameters:
2828
job_name: macos1015_cpython
2929
image_name: macos-10.15
30-
python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
30+
python_versions: ['3.7', '3.8', '3.9', '3.10']
3131
test_suites:
3232
all: venv/bin/pytest -n 2 -vvs
3333

@@ -43,7 +43,7 @@ jobs:
4343
# parameters:
4444
# job_name: win2019_cpython
4545
# image_name: windows-2019
46-
# python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
46+
# python_versions: ['3.7', '3.8', '3.9', '3.10']
4747
# test_suites:
4848
# all: venv\Scripts\pytest -n 2 -vvs
4949
#

requirements-dev.txt

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,31 @@
1-
aboutcode-toolkit==7.0.1
2-
bleach==4.1.0
1+
aboutcode-toolkit==7.0.2
2+
black==22.6.0
3+
bleach==5.0.1
34
build==0.7.0
45
commonmark==0.9.1
5-
docutils==0.18.1
6+
docutils==0.19
67
et-xmlfile==1.1.0
78
execnet==1.9.0
89
iniconfig==1.1.1
9-
jeepney==0.7.1
10-
keyring==23.4.1
11-
openpyxl==3.0.9
10+
isort==5.10.1
11+
jeepney==0.8.0
12+
keyring==23.7.0
13+
mypy-extensions==0.4.3
14+
openpyxl==3.0.10
15+
pathspec==0.9.0
1216
pep517==0.12.0
13-
pkginfo==1.8.2
17+
pkginfo==1.8.3
18+
platformdirs==2.5.2
1419
py==1.11.0
15-
pytest==7.0.1
20+
pytest==7.1.2
1621
pytest-forked==1.4.0
1722
pytest-xdist==2.5.0
18-
readme-renderer==34.0
23+
readme-renderer==35.0
1924
requests-toolbelt==0.9.1
20-
rfc3986==1.5.0
21-
rich==12.3.0
25+
rfc3986==2.0.0
26+
rich==12.5.1
2227
secretstorage==3.3.2
23-
tomli==1.2.3
24-
twine==3.8.0
28+
tomli==2.0.1
29+
tqdm==4.64.0
30+
twine==4.0.1
31+
typing_extensions==4.3.0

requirements.txt

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,78 +2,78 @@ attrs==21.4.0
22
banal==1.0.6
33
beautifulsoup4==4.11.1
44
binaryornot==0.4.4
5-
boolean.py==3.8
6-
certifi==2021.10.8
7-
cffi==1.15.0
8-
chardet==4.0.0
9-
charset-normalizer==2.0.12
10-
click==8.0.4
11-
colorama==0.4.4
12-
commoncode==30.2.0
5+
boolean.py==4.0
6+
certifi==2022.6.15
7+
cffi==1.15.1
8+
chardet==5.0.0
9+
charset-normalizer==2.1.0
10+
click==8.1.3
11+
colorama==0.4.5
12+
commoncode==31.0.0b4
1313
construct==2.10.68
14-
cryptography==36.0.2
15-
debian-inspector==30.0.0
14+
cryptography==37.0.4
15+
debian-inspector==31.0.0b1
1616
dockerfile-parse==1.2.0
1717
dparse2==0.6.1
18-
extractcode==30.0.0
18+
extractcode==31.0.0
1919
extractcode-7z==16.5.210531
2020
extractcode-libarchive==3.5.1.210531
2121
fasteners==0.17.3
2222
fingerprints==1.0.3
23-
ftfy==6.0.3
23+
ftfy==6.1.1
2424
future==0.18.2
2525
gemfileparser==0.8.0
2626
html5lib==1.1
2727
idna==3.3
28-
importlib-metadata==4.8.3
28+
importlib-metadata==4.12.0
2929
inflection==0.5.1
3030
intbitset==3.0.1
3131
isodate==0.6.1
32-
jaraco.functools==3.4.0
32+
jaraco.functools==3.5.1
3333
javaproperties==0.8.1
34-
Jinja2==3.0.3
34+
Jinja2==3.1.2
3535
jsonstreams==0.6.0
36-
license-expression==21.6.14
37-
lxml==4.8.0
38-
MarkupSafe==2.0.1
36+
libfwsi-python==20220123
37+
license-expression==30.0.0
38+
lxml==4.9.1
39+
MarkupSafe==2.1.1
3940
more-itertools==8.13.0
4041
normality==2.3.3
4142
packagedcode-msitools==0.101.210706
42-
packageurl-python==0.9.9
43+
packageurl-python==0.10.0
4344
packaging==21.3
4445
parameter-expansion-patched==0.3.1
45-
patch==1.16
46-
pdfminer.six==20220506
47-
pefile==2021.9.3
46+
pdfminer.six==20220524
47+
pefile==2022.5.30
4848
pip-requirements-parser==31.2.0
4949
pkginfo2==30.0.0
5050
pluggy==1.0.0
51-
plugincode==21.1.21
51+
plugincode==31.0.0b1
5252
ply==3.11
5353
publicsuffix2==2.20191221
5454
pyahocorasick==2.0.0b1
5555
pycparser==2.21
5656
pygmars==0.7.0
5757
Pygments==2.12.0
5858
pymaven-patch==0.3.0
59-
pyparsing==3.0.8
59+
pyparsing==3.0.9
6060
pytz==2022.1
6161
PyYAML==6.0
62-
rdflib==5.0.0
63-
regipy==2.2.2
64-
requests==2.27.1
62+
rdflib==6.2.0
63+
regipy==3.0.2
64+
requests==2.28.1
6565
rpm-inspector-rpm==4.16.1.3.210404
6666
saneyaml==0.5.2
6767
six==1.16.0
68-
soupsieve==2.3.1
68+
soupsieve==2.3.2.post1
6969
spdx-tools==0.7.0a3
7070
text-unidecode==1.3
7171
toml==0.10.2
72-
typecode==21.6.1
72+
typecode==30.0.0
7373
typecode-libmagic==5.39.210531
74-
urllib3==1.26.9
74+
urllib3==1.26.11
7575
urlpy==0.5
7676
wcwidth==0.2.5
7777
webencodings==0.5.1
78-
xmltodict==0.12.0
79-
zipp==3.6.0
78+
xmltodict==0.13.0
79+
zipp==3.8.1

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ zip_safe = false
5454

5555
setup_requires = setuptools_scm[toml] >= 4
5656

57-
python_requires = >=3.6.*
57+
python_requires = >=3.7.*
5858

5959
install_requires =
6060
click >= 6.7, !=7.0, !=8.0.3

src/container_inspector/image.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -356,14 +356,23 @@ def bottom_layer(self):
356356
"""
357357
return self.layers[0]
358358

359-
def extract_layers(self, extracted_location):
359+
def extract_layers(self, extracted_location, as_events=False, skip_symlinks=True):
360360
"""
361361
Extract all layer archives to the `extracted_location` directory.
362362
Each layer is extracted to its own directory named after its `layer_id`.
363+
Skip symlinks and links if ``skip_symlinks`` is True.
364+
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
363365
"""
366+
all_events = []
364367
for layer in self.layers:
365368
exloc = os.path.join(extracted_location, layer.layer_id)
366-
layer.extract(extracted_location=exloc)
369+
events = layer.extract(
370+
extracted_location=exloc,
371+
skip_symlinks=skip_symlinks,
372+
as_events=as_events,
373+
)
374+
all_events.extend(events)
375+
return events
367376

368377
def get_layers_resources(self, with_dir=False):
369378
"""
@@ -450,41 +459,53 @@ def get_installed_packages(self, packages_getter):
450459
yield purl, package, layer
451460

452461
@staticmethod
453-
def extract(archive_location, extracted_location, skip_symlinks=False):
462+
def extract(archive_location, extracted_location, as_events=False, skip_symlinks=False):
454463
"""
455464
Extract the image archive tarball at ``archive_location`` to
456-
``extracted_location``. Skip symlinks and links if ``skip_symlinks`` is True.
465+
``extracted_location``.
466+
Skip symlinks and links if ``skip_symlinks`` is True.
467+
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
457468
"""
458-
utils.extract_tar(
469+
return utils.extract_tar(
459470
location=archive_location,
460471
target_dir=extracted_location,
461472
skip_symlinks=skip_symlinks,
473+
as_events=as_events,
462474
)
463475

464476
@staticmethod
465477
def get_images_from_tarball(
466478
archive_location,
467479
extracted_location,
468480
verify=True,
481+
skip_symlinks=False,
469482
):
470483
"""
471-
Return a list of Images found in the tarball at `archive_location` that
472-
will be extracted to `extracted_location`. The tarball must be in the
484+
Return a list of Images found in the tarball at ``archive_location`` that
485+
will be extracted to ``extracted_location``. The tarball must be in the
473486
format of a "docker save" command tarball.
474487
475-
If `verify` is True, perform extra checks on the config data and layers
488+
If ``verify`` is True, perform extra checks on the config data and layers
476489
checksums.
490+
Skip symlinks and links if ``skip_symlinks`` is True.
491+
Ignore the extract events from extraction.
477492
"""
478493
if TRACE:
479494
logger.debug(
480-
f'get_images_from_tarball: {archive_location} , '
495+
f'get_images_from_tarball: {archive_location} '
481496
f'extracting to: {extracted_location}'
482497
)
483498

484-
Image.extract(
499+
# TODO: do not ignore extract events
500+
_events = Image.extract(
485501
archive_location=archive_location,
486502
extracted_location=extracted_location,
503+
skip_symlinks=skip_symlinks,
487504
)
505+
if TRACE:
506+
logger.debug(f'get_images_from_tarball: events')
507+
for e in _events:
508+
logger.debug(str(e))
488509

489510
return Image.get_images_from_dir(
490511
extracted_location=extracted_location,
@@ -1071,16 +1092,19 @@ def __attrs_post_init__(self, *args, **kwargs):
10711092
if not self.size:
10721093
self.size = os.path.getsize(self.archive_location)
10731094

1074-
def extract(self, extracted_location, skip_symlinks=True):
1095+
def extract(self, extracted_location, as_events=False, skip_symlinks=False):
10751096
"""
10761097
Extract this layer archive in the `extracted_location` directory and set
10771098
this Layer ``extracted_location`` attribute to ``extracted_location``.
1099+
Skip symlinks and links if ``skip_symlinks`` is True.
1100+
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
10781101
"""
10791102
self.extracted_location = extracted_location
1080-
utils.extract_tar(
1103+
return utils.extract_tar(
10811104
location=self.archive_location,
10821105
target_dir=extracted_location,
10831106
skip_symlinks=skip_symlinks,
1107+
as_events=as_events,
10841108
)
10851109

10861110
def get_resources(self, with_dir=False, walker=os.walk):

src/container_inspector/rootfs.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,16 @@ class InconsistentLayersError(Exception):
3131
pass
3232

3333

34-
def rebuild_rootfs(img, target_dir):
34+
def rebuild_rootfs(img, target_dir, skip_symlinks=True):
3535
"""
3636
Extract and merge or "squash" all layers of the `image` Image in a single
3737
rootfs in `target_dir`. Extraction is done in sequence from the bottom (root
3838
or initial) layer to the top (or latest) layer and the "whiteouts"
3939
unionfs/overlayfs procedure is applied at each step as per the OCI spec:
4040
https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
4141
42+
Skip symlinks and links if ``skip_symlinks`` is True.
43+
4244
Return a list of deleted "whiteout" files.
4345
Raise an Exception on errrors.
4446
@@ -73,8 +75,15 @@ def rebuild_rootfs(img, target_dir):
7375
# 1. extract a layer to temp.
7476
# Note that we are not preserving any special file and any file permission
7577
extracted_loc = tempfile.mkdtemp('container_inspector-docker')
76-
layer.extract(extracted_location=extracted_loc)
77-
if TRACE: logger.debug(f' Extracted layer to: {extracted_loc}')
78+
# TODO: do not ignore extract events
79+
_events = layer.extract(
80+
extracted_location=extracted_loc,
81+
skip_symlinks=skip_symlinks,
82+
)
83+
if TRACE:
84+
logger.debug(f' Extracted layer to: {extracted_loc} with skip_symlinks: {skip_symlinks}')
85+
for ev in _events:
86+
logger.debug(f' {ev}')
7887

7988
# 2. find whiteouts in that layer.
8089
whiteouts = list(find_whiteouts(extracted_loc))

0 commit comments

Comments
 (0)