Skip to content

Commit cf09c22

Browse files
authored
Merge pull request #1059 from GitGuardian/severine/fix-paths-exclusion-in-docker
fix: handle ignored_paths in docker scan
2 parents 5734a9d + ecfbdb5 commit cf09c22

File tree

6 files changed

+93
-28
lines changed

6 files changed

+93
-28
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Fixed
2+
3+
- `ggshield secret scan docker` now correctly handles ignored paths (#548).

ggshield/cmd/secret/scan/docker.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def docker_name_cmd(
6161
cache=ctx_obj.cache,
6262
secret_config=config.user_config.secret,
6363
scan_context=scan_context,
64+
exclusion_regexes=ctx_obj.exclusion_regexes,
6465
)
6566

6667
return output_handler.process_scan(scan)

ggshield/cmd/secret/scan/dockerarchive.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def docker_archive_cmd(
4848
cache=ctx_obj.cache,
4949
secret_config=config.user_config.secret,
5050
scan_context=scan_context,
51+
exclusion_regexes=ctx_obj.exclusion_regexes,
5152
)
5253

5354
return output_handler.process_scan(scan)

ggshield/verticals/secret/docker.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,20 @@
1+
from __future__ import annotations
2+
13
import json
24
import re
35
import subprocess
46
import tarfile
57
from contextlib import contextmanager
68
from dataclasses import dataclass
79
from pathlib import Path
8-
from typing import Any, Dict, Generator, Iterable, List
10+
from typing import TYPE_CHECKING
911

1012
from click import UsageError
11-
from pygitguardian import GGClient
1213

1314
from ggshield.core import ui
14-
from ggshield.core.cache import Cache
15-
from ggshield.core.config.user_config import SecretConfig
1615
from ggshield.core.dirs import get_cache_dir
1716
from ggshield.core.errors import UnexpectedError
18-
from ggshield.core.scan import ScanContext, Scannable, StringScannable
17+
from ggshield.core.scan import Scannable, StringScannable
1918
from ggshield.core.scan.id_cache import IDCache
2019
from ggshield.core.scanner_ui import create_scanner_ui
2120
from ggshield.utils.files import is_path_binary
@@ -24,6 +23,15 @@
2423
from .secret_scanner import SecretScanner
2524

2625

26+
if TYPE_CHECKING:
27+
from typing import Any, Dict, Generator, Iterable, List, Set
28+
29+
from pygitguardian import GGClient
30+
31+
from ggshield.core.cache import Cache
32+
from ggshield.core.config.user_config import SecretConfig
33+
from ggshield.core.scan import ScanContext
34+
2735
FILEPATH_BANLIST = [
2836
r"^/?usr/(?!share/nginx)",
2937
r"^/?lib/",
@@ -42,6 +50,7 @@
4250
FILEPATH_BANLIST_PATTERNS = {
4351
re.compile(banned_filepath) for banned_filepath in FILEPATH_BANLIST
4452
}
53+
# Note that FILEPATH_BANLIST_PATTERNS comes in addition to what's done in _exclude_callback
4554

4655
LAYER_TO_SCAN_PATTERN = re.compile(r"\b(copy|add)\b", re.IGNORECASE)
4756

@@ -225,7 +234,9 @@ def _load_layer_infos(self) -> None:
225234
]
226235
self.layer_infos = [x for x in layer_infos if x.should_scan()]
227236

228-
def get_layer_scannables(self, layer_info: LayerInfo) -> Iterable[Scannable]:
237+
def get_layer_scannables(
238+
self, layer_info: LayerInfo, exclusion_regexes: Set[re.Pattern[str]]
239+
) -> Iterable[Scannable]:
229240
"""
230241
Extracts Scannable to be scanned for given layer.
231242
"""
@@ -244,17 +255,21 @@ def get_layer_scannables(self, layer_info: LayerInfo) -> Iterable[Scannable]:
244255
if file_info.size == 0:
245256
continue
246257

247-
if not _validate_filepath(filepath=file_info.path):
258+
if not _validate_filepath(
259+
filepath=file_info.path, exclusion_regexes=exclusion_regexes
260+
):
248261
continue
249262

250263
yield DockerContentScannable(layer_id, layer_archive, file_info)
251264

252265

253266
def _validate_filepath(
254267
filepath: str,
268+
exclusion_regexes: Set[re.Pattern[str]],
255269
) -> bool:
256270
if any(
257-
banned_pattern.search(filepath) for banned_pattern in FILEPATH_BANLIST_PATTERNS
271+
banned_pattern.search(filepath)
272+
for banned_pattern in FILEPATH_BANLIST_PATTERNS | exclusion_regexes
258273
):
259274
return False
260275

@@ -328,6 +343,7 @@ def docker_scan_archive(
328343
cache: Cache,
329344
secret_config: SecretConfig,
330345
scan_context: ScanContext,
346+
exclusion_regexes: Set[re.Pattern[str]],
331347
) -> SecretScanCollection:
332348
scanner = SecretScanner(
333349
client=client,
@@ -347,7 +363,7 @@ def docker_scan_archive(
347363
)
348364

349365
for info in docker_image.layer_infos:
350-
files = list(docker_image.get_layer_scannables(info))
366+
files = list(docker_image.get_layer_scannables(info, exclusion_regexes))
351367
file_count = len(files)
352368
if file_count == 0:
353369
continue

tests/unit/cmd/scan/test_docker.py

Lines changed: 62 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import re
23
import sys
34
from pathlib import Path
45
from unittest.mock import Mock, patch
@@ -7,7 +8,11 @@
78
import pytest
89

910
from ggshield.__main__ import cli
11+
from ggshield.cmd.secret.scan.secret_scan_common_options import (
12+
IGNORED_DEFAULT_WILDCARDS,
13+
)
1014
from ggshield.core.errors import ExitCode
15+
from ggshield.core.filter import init_exclusion_regexes
1116
from ggshield.core.scan import StringScannable
1217
from ggshield.verticals.secret import SecretScanCollection
1318
from ggshield.verticals.secret.docker import DockerImage, LayerInfo, _validate_filepath
@@ -18,30 +23,37 @@
1823
assert_invoke_exited_with,
1924
assert_invoke_ok,
2025
my_vcr,
26+
write_text,
2127
)
2228

2329

2430
class TestDockerUtils:
2531
@pytest.mark.parametrize(
26-
"filepath, valid",
32+
"filepath, exclusion_regexes, valid",
2733
(
28-
["/usr/bin/secret.py", False],
29-
["usr/bin/secret.py", False],
30-
["/my/file/secret.py", True],
31-
["/my/file/usr/bin/secret.py", True],
32-
["/usr/share/nginx/secret.py", True],
33-
["/gems/secret.py", True],
34-
["/npm-bis/secret.py", True],
35-
["/banned/extension/secret.exe", False],
36-
["/banned/extension/secret.mng", False],
37-
["/banned/extension/secret.tar", False],
38-
["/banned/extension/secret.other", True],
34+
["/usr/bin/secret.py", set(), False],
35+
["usr/bin/secret.py", set(), False],
36+
["/my/file/secret.py", set(), True],
37+
["/my/file/usr/bin/secret.py", set(), True],
38+
["/usr/share/nginx/secret.py", set(), True],
39+
["/gems/secret.py", set(), True],
40+
["/npm-bis/secret.py", set(), True],
41+
["/banned/extension/secret.exe", set(), False],
42+
["/banned/extension/secret.mng", set(), False],
43+
["/banned/extension/secret.tar", set(), False],
44+
["/banned/extension/secret.other", set(), True],
45+
[
46+
"/banned/extension/secret.other",
47+
{re.compile(r"secret")},
48+
False,
49+
],
3950
),
4051
)
41-
def test_docker_filepath_validation(self, filepath, valid):
52+
def test_docker_filepath_validation(self, filepath, exclusion_regexes, valid):
4253
assert (
4354
_validate_filepath(
4455
filepath=filepath,
56+
exclusion_regexes=exclusion_regexes,
4557
)
4658
is valid
4759
)
@@ -108,12 +120,14 @@ def test_docker_scan_failed_to_save(
108120
"image_path", [DOCKER_EXAMPLE_PATH, DOCKER__INCOMPLETE_MANIFEST_EXAMPLE_PATH]
109121
)
110122
@pytest.mark.parametrize("json_output", (False, True))
123+
@pytest.mark.parametrize("ignore_secret_file", (True, False))
111124
def test_docker_scan_archive(
112125
self,
113126
docker_image_open_mock: Mock,
114127
cli_fs_runner: click.testing.CliRunner,
115128
image_path: Path,
116129
json_output: bool,
130+
ignore_secret_file: bool,
117131
):
118132
assert image_path.exists()
119133

@@ -131,7 +145,9 @@ def create_docker_image() -> Mock(spec=DockerImage):
131145
scannable = StringScannable(
132146
content=UNCHECKED_SECRET_PATCH, url="file_secret"
133147
)
134-
docker_image.get_layer_scannables.return_value = [scannable]
148+
docker_image.get_layer_scannables.return_value = (
149+
[scannable] if not ignore_secret_file else []
150+
)
135151

136152
return docker_image
137153

@@ -142,6 +158,15 @@ def create_docker_image() -> Mock(spec=DockerImage):
142158
with my_vcr.use_cassette("test_scan_file_secret"):
143159
json_arg = ["--json"] if json_output else []
144160
cli_fs_runner.mix_stderr = False
161+
if ignore_secret_file:
162+
config = """
163+
version: 2
164+
secret:
165+
ignored_paths:
166+
- "file_secret"
167+
168+
"""
169+
write_text(Path(".gitguardian.yaml"), config)
145170
result = cli_fs_runner.invoke(
146171
cli,
147172
[
@@ -153,12 +178,31 @@ def create_docker_image() -> Mock(spec=DockerImage):
153178
str(image_path),
154179
],
155180
)
156-
assert_invoke_exited_with(result, ExitCode.SCAN_FOUND_PROBLEMS)
181+
assert_invoke_exited_with(
182+
result,
183+
(
184+
ExitCode.SCAN_FOUND_PROBLEMS
185+
if not ignore_secret_file
186+
else ExitCode.SUCCESS
187+
),
188+
)
157189
docker_image_open_mock.assert_called_once_with(image_path)
158-
docker_image.get_layer_scannables.assert_called_once_with(layer_info)
190+
docker_image.get_layer_scannables.assert_called_once_with(
191+
layer_info,
192+
init_exclusion_regexes(
193+
IGNORED_DEFAULT_WILDCARDS
194+
+ (["file_secret"] if ignore_secret_file else [])
195+
),
196+
)
159197

160198
if json_output:
161199
output = json.loads(result.output)
162-
assert len(output["entities_with_incidents"]) == 1
200+
if not ignore_secret_file:
201+
assert len(output["entities_with_incidents"]) == 1
202+
else:
203+
assert output["total_incidents"] == 0
163204
else:
164-
assert "file_secret: 1 secret detected" in result.output
205+
if not ignore_secret_file:
206+
assert "file_secret: 1 secret detected" in result.output
207+
else:
208+
assert "No secrets have been found" in result.output

tests/unit/verticals/secret/test_scan_docker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def test_docker_archive(self, image_path: Path):
9494

9595
# Fill layer_ids and layer_files
9696
for info in image.layer_infos:
97-
if files := list(image.get_layer_scannables(info)):
97+
if files := list(image.get_layer_scannables(info, set())):
9898
layer_ids.append(info.diff_id)
9999
layer_files.append(files)
100100

0 commit comments

Comments
 (0)