Skip to content

Commit 66d6d01

Browse files
authored
Merge pull request #46 from nexB/whiteouts
Properly support whiteouts in container tarballs
2 parents 3061713 + 7ad595b commit 66d6d01

File tree

8 files changed

+133
-31
lines changed

8 files changed

+133
-31
lines changed

CHANGELOG.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
Changelog
22
=========
33

4+
v32.0.1
5+
--------
6+
7+
This is a minor release with bug fixes.
8+
9+
- We now correctly process the opaque whiteouts seen in container image layers
10+
tarballs.
11+
12+
Thank you to AJ Arena @sig-aarena
13+
14+
415
v32.0.0
516
--------
617

src/container_inspector/rootfs.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,14 @@ def rebuild_rootfs(img, target_dir, skip_symlinks=True):
108108
return deletions
109109

110110

111-
WHITEOUT_EXPLICIT_PREFIX = '.wh.'
112-
WHITEOUT_OPAQUE_PREFIX = '.wh..wh.opq'
111+
WHITEOUT_PREFIX = '.wh.'
112+
WHITEOUT_SPECIAL_PREFIX = '.wh..wh'
113+
WHITEOUT_OPAQUE_PREFIX = '.wh..wh..opq'
113114

114115

115-
def is_whiteout_marker(path):
116+
def is_whiteout_marker(file_name):
116117
"""
117-
Return True if the ``path`` is a whiteout marker file.
118+
Return True if the ``file_name`` is a whiteout marker file.
118119
119120
For example::
120121
>>> is_whiteout_marker('.wh.somepath')
@@ -123,32 +124,80 @@ def is_whiteout_marker(path):
123124
True
124125
>>> is_whiteout_marker('somepath.wh.')
125126
False
126-
>>> is_whiteout_marker('somepath/.wh.foo')
127+
>>> is_whiteout_marker('.wh.foo')
127128
True
128-
>>> is_whiteout_marker('somepath/.wh.foo/')
129+
"""
130+
return file_name and file_name.startswith(WHITEOUT_PREFIX)
131+
132+
133+
def is_whiteout_opaque_marker(file_name):
134+
"""
135+
Return True if the ``file_name`` is an opaque whiteout marker file.
136+
137+
For example::
138+
>>> is_whiteout_opaque_marker('.wh.somepath')
139+
False
140+
>>> is_whiteout_opaque_marker('.wh..wh.opq')
141+
False
142+
>>> is_whiteout_opaque_marker('.wh..wh..opq')
129143
True
144+
>>> is_whiteout_opaque_marker('somepath..wh..wh..opq')
145+
False
146+
>>> is_whiteout_opaque_marker('.wh..wh.plnk')
147+
False
148+
>>> is_whiteout_opaque_marker('.wh..wh..opq.foo')
149+
False
150+
>>> is_whiteout_opaque_marker('somepath/.wh..wh..opq/')
151+
False
152+
"""
153+
return file_name and file_name == WHITEOUT_OPAQUE_PREFIX
154+
155+
156+
def is_whiteout_special_marker(file_name):
157+
"""
158+
Return True if the ``file_name`` is an opaque whiteout marker file.
159+
160+
For example::
161+
>>> is_whiteout_special_marker('.wh.somepath')
162+
False
163+
>>> is_whiteout_special_marker('.wh..wh.opq')
164+
True
165+
>>> is_whiteout_special_marker('.wh..wh..opq')
166+
True
167+
>>> is_whiteout_special_marker('.wh..wh.plnk')
168+
True
169+
>>> is_whiteout_special_marker('somepath..wh..wh..opq')
170+
False
171+
>>> is_whiteout_special_marker('.wh..wh..opq.foo')
172+
True
173+
>>> is_whiteout_special_marker('somepath/.wh..wh..opq/')
174+
False
130175
"""
131-
file_name = path and os.path.basename(path.strip('/')) or ''
132-
return file_name.startswith(WHITEOUT_EXPLICIT_PREFIX)
176+
return file_name and file_name.startswith(WHITEOUT_SPECIAL_PREFIX)
133177

134178

135179
def get_whiteable_path(path):
136180
"""
137181
Return the whiteable path for ``path`` or None if this not a whiteable path.
138-
TODO: Handle OSses with case-insensitive FS (e.g. Windows)
139182
"""
183+
# FIXME: Handle OSses with case-insensitive FS (e.g. Windows)
140184
file_name = os.path.basename(path)
141185
parent_dir = os.path.dirname(path)
142186

143-
if file_name == WHITEOUT_OPAQUE_PREFIX:
187+
if is_whiteout_special_marker(file_name):
144188
# Opaque whiteouts means the whole parent directory should be removed
145189
# https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
190+
# note as a simplification we treat all of these as opaque, and log these that are not
191+
if not is_whiteout_opaque_marker(file_name):
192+
# This is the case for legacy AUFS '.wh..wh.plnk' and '.wh..wh.aufs'
193+
# only seen in legacy Docker
194+
logger.error(f'ERROR: unsupported whiteout filename: {file_name}')
146195
return parent_dir
147196

148-
if file_name.startswith(WHITEOUT_EXPLICIT_PREFIX):
197+
elif is_whiteout_marker(file_name):
149198
# Explicit, file-only whiteout
150199
# https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
151-
_, _, real_file_name = file_name.rpartition(WHITEOUT_EXPLICIT_PREFIX)
200+
_, _, real_file_name = file_name.rpartition(WHITEOUT_PREFIX)
152201
return os.path.join(parent_dir, real_file_name)
153202

154203

src/container_inspector/utils.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,23 @@ def to_string(self):
103103
return f"{self.type}: {self.message}"
104104

105105

106+
def is_relative_path(path):
107+
"""
108+
Return True if ``path`` is a relative path.
109+
>>> is_relative_path('.wh..wh..opq')
110+
False
111+
>>> is_relative_path('.wh/../wh..opq')
112+
True
113+
>>> is_relative_path('..foor')
114+
False
115+
>>> is_relative_path('../foor')
116+
True
117+
>>> is_relative_path('.//.foor//..')
118+
True
119+
"""
120+
return any(name == '..' for name in path.split('/'))
121+
122+
106123
def extract_tar(location, target_dir, as_events=False, skip_symlinks=True, trace=TRACE):
107124
"""
108125
Extract a tar archive at ``location`` in the ``target_dir`` directory.
@@ -133,7 +150,7 @@ def extract_tar(location, target_dir, as_events=False, skip_symlinks=True, trace
133150
logger.debug(f'extract_tar: {msg}')
134151
continue
135152

136-
if '..' in tarinfo.name:
153+
if is_relative_path(tarinfo.name):
137154
msg = f'{location}: skipping unsupported {tarinfo.name} with relative path.'
138155
events.append(ExtractEvent(type=ExtractEvent.WARNING, source=tarinfo.name, message=msg))
139156
if trace:
File renamed without changes.
File renamed without changes.
5.5 KB
Binary file not shown.
File renamed without changes.

tests/test_utils.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ def clean_events(self, extract_dir, events):
5454
source=e.source.replace(extract_dir, ''),
5555
message=e.message.replace(self.test_data_dir, ''),
5656
)
57-
events_results.append(ne._asdict())
58-
59-
return events_results
57+
events_results.append(ne)
58+
events_results = sorted(events_results, key=lambda x: x.source)
59+
return [dict(ne._asdict()) for ne in events_results]
6060

6161
def clean_paths(self, extract_dir):
6262
return sorted([p.replace(extract_dir, '') for p in
@@ -70,45 +70,70 @@ def test_extract_tree_with_colon_in_filenames(self):
7070
expected = (
7171
'colon/libc6:amd64.list',
7272
)
73-
test_dir = self.get_test_loc('tar/colon.tar.xz')
73+
test_dir = self.get_test_loc('utils/colon.tar.xz')
7474
extract_dir = self.get_temp_dir()
7575
events = utils.extract_tar(location=test_dir, target_dir=extract_dir)
7676
check_files(target_dir=extract_dir, expected=expected)
7777
assert not events
7878

7979
def test_extract_tar_relative(self):
8080
expected = ()
81-
test_dir = self.get_test_loc('tar/tar_relative.tar')
81+
test_dir = self.get_test_loc('utils/tar_relative.tar')
8282
extract_dir = self.get_temp_dir()
8383
events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True)
8484
check_files(target_dir=extract_dir, expected=expected)
8585
events = self.clean_events(extract_dir, events)
8686
expected_events = [
87-
{'message': '/tar/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.',
88-
'source': '../a_parent_folder.txt',
89-
'type': 'warning'},
90-
{'message': '/tar/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.',
87+
{'message': '/utils/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.',
9188
'source': '../../another_folder/b_two_root.txt',
9289
'type': 'warning'},
93-
{'message': '/tar/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.',
90+
{'message': '/utils/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.',
91+
'source': '../a_parent_folder.txt',
92+
'type': 'warning'},
93+
{'message': '/utils/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.',
9494
'source': '../folder/subfolder/b_subfolder.txt',
9595
'type': 'warning'},
9696
]
9797

9898
assert events == expected_events
9999

100+
def test_extract_tar_relative_with_whiteouts(self):
101+
expected = (
102+
'.wh..wh..opq',
103+
'.wh..wh..plnk',
104+
'.wh.foo.txt'
105+
)
106+
test_dir = self.get_test_loc('utils/tar_relative-with-whiteouts.tar')
107+
extract_dir = self.get_temp_dir()
108+
events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True)
109+
check_files(target_dir=extract_dir, expected=expected)
110+
events = self.clean_events(extract_dir, events)
111+
expected_events = [
112+
{'message': '/utils/tar_relative-with-whiteouts.tar: skipping unsupported ../../another_folder/.wh..wh..opq with relative path.',
113+
'source': '../../another_folder/.wh..wh..opq',
114+
'type': 'warning'},
115+
{'message': '/utils/tar_relative-with-whiteouts.tar: skipping unsupported ../.wh..wh..opq with relative path.',
116+
'source': '../.wh..wh..opq',
117+
'type': 'warning'},
118+
{'message': '/utils/tar_relative-with-whiteouts.tar: skipping unsupported ../folder/subfolder/.wh..wh..opq with relative path.',
119+
'source': '../folder/subfolder/.wh..wh..opq',
120+
'type': 'warning'},
121+
]
122+
123+
assert events == expected_events
124+
100125
def test_extract_tar_relative_as_strings(self):
101126
expected = ()
102-
test_dir = self.get_test_loc('tar/tar_relative.tar')
127+
test_dir = self.get_test_loc('utils/tar_relative.tar')
103128
extract_dir = self.get_temp_dir()
104129
events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=False)
105130
check_files(target_dir=extract_dir, expected=expected)
106131

107132
events = [e.replace(self.test_data_dir, '') for e in events]
108133
expected_events = [
109-
'warning: /tar/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.',
110-
'warning: /tar/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.',
111-
'warning: /tar/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.',
134+
'warning: /utils/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.',
135+
'warning: /utils/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.',
136+
'warning: /utils/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.',
112137
]
113138
assert events == expected_events
114139

@@ -117,20 +142,20 @@ def test_extract_tar_absolute(self):
117142
'tmp/subdir/a.txt',
118143
'tmp/subdir/b.txt',
119144
)
120-
test_dir = self.get_test_loc('tar/absolute_path.tar')
145+
test_dir = self.get_test_loc('utils/absolute_path.tar')
121146
extract_dir = self.get_temp_dir()
122147
events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True)
123148
check_files(target_dir=extract_dir, expected=expected)
124149

125150
events = self.clean_events(extract_dir, events)
126151
expected_events = [
127-
{'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir transformed in relative path.',
152+
{'message': '/utils/absolute_path.tar: absolute path name: /tmp/subdir transformed in relative path.',
128153
'source': '/tmp/subdir',
129154
'type': 'warning'},
130-
{'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir/a.txt transformed in relative path.',
155+
{'message': '/utils/absolute_path.tar: absolute path name: /tmp/subdir/a.txt transformed in relative path.',
131156
'source': '/tmp/subdir/a.txt',
132157
'type': 'warning'},
133-
{'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir/b.txt transformed in relative path.',
158+
{'message': '/utils/absolute_path.tar: absolute path name: /tmp/subdir/b.txt transformed in relative path.',
134159
'source': '/tmp/subdir/b.txt',
135160
'type': 'warning'},
136161
]

0 commit comments

Comments
 (0)