Skip to content

Commit 050c05b

Browse files
committed
When extracting, return a list of strings
* Use the new "as_events" arg to get a list of ExtractEvents. But this is no longer the default. Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent c7fecc2 commit 050c05b

File tree

5 files changed

+68
-89
lines changed

5 files changed

+68
-89
lines changed

CHANGELOG.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,16 @@ v32.0.0
77
This is a minor release with bug fixes and an output change.
88

99
- We no longer support Python 3.6, only 3.7 and up.
10+
1011
- "utils.extract_tar" function now behaves correctly with links and return
11-
ExtractEvent to track extraction errors and warnings.
12-
This replaces the simpler list of error messages.
12+
either a list of error message strings (the previous default) but with updated
13+
messages or a list of ExtractEvent to better track extraction errors and warnings.
14+
The behavious is driven by the "as_events" argument.
15+
1316
- In all places where extract is callable (Image, Layer) there is a new
14-
skip_symlinks argument defaulting to True. If True, we skip symlinks and links.
17+
"skip_symlinks" argument defaulting to True. If True, we skip symlinks and links.
18+
The same applies with the "as_events" available in these places as these
19+
functions now return a list (rather than nothing before).
1520

1621

1722
v31.1.0

src/container_inspector/image.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -356,19 +356,20 @@ def bottom_layer(self):
356356
"""
357357
return self.layers[0]
358358

359-
def extract_layers(self, extracted_location, skip_symlinks=True):
359+
def extract_layers(self, extracted_location, as_events=False, skip_symlinks=True):
360360
"""
361361
Extract all layer archives to the `extracted_location` directory.
362362
Each layer is extracted to its own directory named after its `layer_id`.
363363
Skip symlinks and links if ``skip_symlinks`` is True.
364-
Return a list of ExtractEvent.
364+
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
365365
"""
366366
all_events = []
367367
for layer in self.layers:
368368
exloc = os.path.join(extracted_location, layer.layer_id)
369369
events = layer.extract(
370370
extracted_location=exloc,
371371
skip_symlinks=skip_symlinks,
372+
as_events=as_events,
372373
)
373374
all_events.extend(events)
374375
return events
@@ -458,17 +459,18 @@ def get_installed_packages(self, packages_getter):
458459
yield purl, package, layer
459460

460461
@staticmethod
461-
def extract(archive_location, extracted_location, skip_symlinks=False):
462+
def extract(archive_location, extracted_location, as_events=False, skip_symlinks=False):
462463
"""
463464
Extract the image archive tarball at ``archive_location`` to
464465
``extracted_location``.
465466
Skip symlinks and links if ``skip_symlinks`` is True.
466-
Return a list of ExtractEvent.
467+
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
467468
"""
468469
return utils.extract_tar(
469470
location=archive_location,
470471
target_dir=extracted_location,
471472
skip_symlinks=skip_symlinks,
473+
as_events=as_events,
472474
)
473475

474476
@staticmethod
@@ -500,6 +502,10 @@ def get_images_from_tarball(
500502
extracted_location=extracted_location,
501503
skip_symlinks=skip_symlinks,
502504
)
505+
if TRACE:
506+
logger.debug(f'get_images_from_tarball: events')
507+
for e in _events:
508+
logger.debug(str(e))
503509

504510
return Image.get_images_from_dir(
505511
extracted_location=extracted_location,
@@ -1086,18 +1092,19 @@ def __attrs_post_init__(self, *args, **kwargs):
10861092
if not self.size:
10871093
self.size = os.path.getsize(self.archive_location)
10881094

1089-
def extract(self, extracted_location, skip_symlinks=False):
1095+
def extract(self, extracted_location, as_events=False, skip_symlinks=False):
10901096
"""
10911097
Extract this layer archive in the `extracted_location` directory and set
10921098
this Layer ``extracted_location`` attribute to ``extracted_location``.
10931099
Skip symlinks and links if ``skip_symlinks`` is True.
1094-
Return a list of ExtractEvent.
1100+
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
10951101
"""
10961102
self.extracted_location = extracted_location
10971103
return utils.extract_tar(
10981104
location=self.archive_location,
10991105
target_dir=extracted_location,
11001106
skip_symlinks=skip_symlinks,
1107+
as_events=as_events,
11011108
)
11021109

11031110
def get_resources(self, with_dir=False, walker=os.walk):

src/container_inspector/rootfs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,13 @@ def rebuild_rootfs(img, target_dir, skip_symlinks=True):
7676
# Note that we are not preserving any special file and any file permission
7777
extracted_loc = tempfile.mkdtemp('container_inspector-docker')
7878
# TODO: do not ignore extract events
79-
_extract_events = layer.extract(
79+
_events = layer.extract(
8080
extracted_location=extracted_loc,
8181
skip_symlinks=skip_symlinks,
8282
)
8383
if TRACE:
8484
logger.debug(f' Extracted layer to: {extracted_loc} with skip_symlinks: {skip_symlinks}')
85-
for ev in _extract_events:
85+
for ev in _events:
8686
logger.debug(f' {ev}')
8787

8888
# 2. find whiteouts in that layer.

src/container_inspector/utils.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,16 @@ class ExtractEvent(NamedTuple):
9999
# even message
100100
message: str
101101

102+
def to_string(self):
103+
return f"{self.type}: {self.message}"
102104

103-
def extract_tar(location, target_dir, skip_symlinks=True, trace=TRACE):
105+
106+
def extract_tar(location, target_dir, as_events=False, skip_symlinks=True, trace=TRACE):
104107
"""
105-
Extract a tar archive at ``location`` in the ``target_dir`` directory and
106-
return a list of ExtractEvent possibly empty.
107-
Skip symlinks and hardlinks if skip_symlinks is True.
108+
Extract a tar archive at ``location`` in the ``target_dir`` directory.
109+
Return a list of ExtractEvent is ``as_events`` is True, or a list of message
110+
strings otherwise. This list can be empty. Skip symlinks and hardlinks if
111+
skip_symlinks is True.
108112
109113
Ignore special device files.
110114
Do not preserve the permissions and owners.
@@ -159,12 +163,13 @@ def extract_tar(location, target_dir, skip_symlinks=True, trace=TRACE):
159163
events.append(ExtractEvent(type=ExtractEvent.ERROR, source=tarinfo.name, message=msg))
160164
if trace:
161165
logger.debug(f'extract_tar: {msg}')
162-
166+
if not as_events:
167+
events = [e.to_string() for e in events]
163168
return events
164169

165170

166-
def extract_tar_with_symlinks(location, target_dir):
167-
return extract_tar(location=location, target_dir=target_dir, skip_symlinks=False)
171+
def extract_tar_with_symlinks(location, target_dir, as_events=False):
172+
return extract_tar(location=location, target_dir=target_dir, as_events=as_events, skip_symlinks=False,)
168173

169174

170175
def lower_keys(mapping):

tests/test_utils.py

Lines changed: 33 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@ def clean_events(self, extract_dir, events):
5050
"""
5151
events_results = []
5252
for e in events:
53-
ne = e._replace(source=e.source.replace(extract_dir, ''),
54-
message=e.message.replace(self.test_data_dir, ''))
53+
ne = e._replace(
54+
source=e.source.replace(extract_dir, ''),
55+
message=e.message.replace(self.test_data_dir, ''),
56+
)
5557
events_results.append(ne._asdict())
5658

5759
return events_results
@@ -78,7 +80,7 @@ def test_extract_tar_relative(self):
7880
expected = ()
7981
test_dir = self.get_test_loc('tar/tar_relative.tar')
8082
extract_dir = self.get_temp_dir()
81-
events = utils.extract_tar(location=test_dir, target_dir=extract_dir)
83+
events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True)
8284
check_files(target_dir=extract_dir, expected=expected)
8385
events = self.clean_events(extract_dir, events)
8486
expected_events = [
@@ -95,14 +97,29 @@ def test_extract_tar_relative(self):
9597

9698
assert events == expected_events
9799

100+
def test_extract_tar_relative_as_strings(self):
101+
expected = ()
102+
test_dir = self.get_test_loc('tar/tar_relative.tar')
103+
extract_dir = self.get_temp_dir()
104+
events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=False)
105+
check_files(target_dir=extract_dir, expected=expected)
106+
107+
events = [e.replace(self.test_data_dir, '') for e in events]
108+
expected_events = [
109+
'warning: /tar/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.',
110+
'warning: /tar/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.',
111+
'warning: /tar/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.',
112+
]
113+
assert events == expected_events
114+
98115
def test_extract_tar_absolute(self):
99116
expected = (
100117
'tmp/subdir/a.txt',
101118
'tmp/subdir/b.txt',
102119
)
103120
test_dir = self.get_test_loc('tar/absolute_path.tar')
104121
extract_dir = self.get_temp_dir()
105-
events = utils.extract_tar(location=test_dir, target_dir=extract_dir)
122+
events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True)
106123
check_files(target_dir=extract_dir, expected=expected)
107124

108125
events = self.clean_events(extract_dir, events)
@@ -124,10 +141,9 @@ def test_extract_tar_not_skipping_links(self):
124141
test_tarball = self.get_test_loc('utils/layer_with_links.tar')
125142
extract_dir = self.get_temp_dir()
126143

127-
events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, skip_symlinks=False)
144+
events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, as_events=True, skip_symlinks=False)
128145

129146
results = self.clean_paths(extract_dir)
130-
131147
expected_results = self.get_test_loc('utils/layer_with_links.tar.expected.json', must_exist=False)
132148
check_expected(results, expected_results, regen=False)
133149

@@ -139,82 +155,41 @@ def test_extract_tar_skipping_links(self):
139155
test_tarball = self.get_test_loc('utils/layer_with_links.tar')
140156
extract_dir = self.get_temp_dir()
141157

142-
events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, skip_symlinks=True)
143-
144-
results = sorted([p.replace(extract_dir, '')
145-
for p in fileutils.resource_iter(
146-
location=extract_dir,
147-
with_dirs=True,
148-
follow_symlinks=True,
149-
)
150-
])
158+
events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, as_events=True, skip_symlinks=True)
151159

160+
results = self.clean_paths(extract_dir)
152161
expected_results = self.get_test_loc('utils/layer_with_links.tar.expected-skipping.json', must_exist=False)
153162
check_expected(results, expected_results, regen=False)
154163

155-
events_results = []
156-
for e in events:
157-
ne = e._replace(
158-
source=e.source.replace(extract_dir, ''),
159-
message=e.message.replace(self.test_data_dir, ''),
160-
)
161-
events_results.append(ne._asdict())
162-
164+
events_results = self.clean_events(extract_dir, events)
163165
expected_events = self.get_test_loc('utils/layer_with_links.tar.expected-events-skipping.json', must_exist=False)
164166
check_expected(events_results, expected_events, regen=False)
165167

166168
def test_extract_tar_with_symlinks(self):
167169
test_tarball = self.get_test_loc('utils/layer_with_links.tar')
168170
extract_dir = self.get_temp_dir()
169171

170-
events = utils.extract_tar_with_symlinks(location=test_tarball, target_dir=extract_dir)
171-
results = sorted([p.replace(extract_dir, '')
172-
for p in fileutils.resource_iter(
173-
location=extract_dir,
174-
with_dirs=True,
175-
follow_symlinks=True,
176-
)
177-
])
172+
events = utils.extract_tar_with_symlinks(location=test_tarball, as_events=True, target_dir=extract_dir)
178173

174+
results = self.clean_paths(extract_dir)
179175
expected_results = self.get_test_loc('utils/layer_with_links.tar.expected.json', must_exist=False)
180176
check_expected(results, expected_results, regen=False)
181177

182-
events_results = []
183-
for e in events:
184-
ne = e._replace(
185-
source=e.source.replace(extract_dir, ''),
186-
message=e.message.replace(self.test_data_dir, ''),
187-
)
188-
events_results.append(ne._asdict())
189-
178+
events_results = self.clean_events(extract_dir, events)
190179
expected_events = self.get_test_loc('utils/layer_with_links.tar.expected-events.json', must_exist=False)
191180
check_expected(events_results, expected_events, regen=False)
192181

193182
def test_extract_tar_with_broken_links_skipping_links(self):
194183
test_tarball = self.get_test_loc('utils/layer_with_links_missing_targets.tar')
195184
extract_dir = self.get_temp_dir()
196185

197-
events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, skip_symlinks=True)
198-
199-
results = sorted([p.replace(extract_dir, '')
200-
for p in fileutils.resource_iter(
201-
location=extract_dir,
202-
with_dirs=True,
203-
follow_symlinks=True,
204-
)
205-
])
186+
events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, as_events=True, skip_symlinks=True)
206187

188+
results = self.clean_paths(extract_dir)
207189
expected_results = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected.json', must_exist=False)
208190
check_expected(results, expected_results, regen=False)
209191

210-
events_results = []
211-
for e in events:
212-
ne = e._replace(
213-
source=e.source.replace(extract_dir, ''),
214-
message=e.message.replace(self.test_data_dir, ''),
215-
)
216-
events_results.append(ne._asdict())
217-
192+
events_results = self.clean_events(extract_dir, events)
218193
expected_events = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected-events.json', must_exist=False)
219194
check_expected(events_results, expected_events, regen=False)
220195

@@ -223,25 +198,12 @@ def test_extract_tar_with_symlinks_with_broken_links(self):
223198
extract_dir = self.get_temp_dir()
224199

225200
events = utils.extract_tar_with_symlinks(location=test_tarball, target_dir=extract_dir)
226-
results = sorted([p.replace(extract_dir, '')
227-
for p in fileutils.resource_iter(
228-
location=extract_dir,
229-
with_dirs=True,
230-
follow_symlinks=True,
231-
)
232-
])
233201

202+
results = self.clean_paths(extract_dir)
234203
expected_results = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected-broken.json', must_exist=False)
235204
check_expected(results, expected_results, regen=False)
236205

237-
events_results = []
238-
for e in events:
239-
ne = e._replace(
240-
source=e.source.replace(extract_dir, ''),
241-
message=e.message.replace(self.test_data_dir, ''),
242-
)
243-
events_results.append(ne._asdict())
244-
206+
events_results = self.clean_events(extract_dir, events)
245207
expected_events = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected-events-broken.json', must_exist=False)
246208
check_expected(events_results, expected_events, regen=False)
247209

0 commit comments

Comments
 (0)