Skip to content

Commit c4c7fe4

Browse files
committed
CM-53929 - implement .cycodeignore files filtering for repo and commit range scans
1 parent 53d3d64 commit c4c7fe4

File tree

6 files changed

+567
-2
lines changed

6 files changed

+567
-2
lines changed

cycode/cli/apps/scan/commit_range_scanner.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
)
3232
from cycode.cli.files_collector.file_excluder import excluder
3333
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
34+
from cycode.cli.files_collector.documents_walk_ignore import filter_documents_with_cycodeignore
3435
from cycode.cli.files_collector.sca.sca_file_collector import (
3536
perform_sca_pre_commit_range_scan_actions,
3637
perform_sca_pre_hook_range_scan_actions,
@@ -188,6 +189,9 @@ def _scan_sca_commit_range(ctx: typer.Context, repo_path: str, commit_range: str
188189
)
189190
from_commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, from_commit_documents)
190191
to_commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, to_commit_documents)
192+
193+
from_commit_documents = filter_documents_with_cycodeignore(from_commit_documents, repo_path)
194+
to_commit_documents = filter_documents_with_cycodeignore(to_commit_documents, repo_path)
191195

192196
perform_sca_pre_commit_range_scan_actions(
193197
repo_path, from_commit_documents, from_commit_rev, to_commit_documents, to_commit_rev
@@ -203,6 +207,8 @@ def _scan_secret_commit_range(
203207
diff_documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(
204208
consts.SECRET_SCAN_TYPE, commit_diff_documents_to_scan
205209
)
210+
211+
diff_documents_to_scan = filter_documents_with_cycodeignore(diff_documents_to_scan, repo_path)
206212

207213
scan_documents(
208214
ctx, diff_documents_to_scan, get_scan_parameters(ctx, (repo_path,)), is_git_diff=True, is_commit_range=True
@@ -221,8 +227,12 @@ def _scan_sast_commit_range(ctx: typer.Context, repo_path: str, commit_range: st
221227
to_commit_rev,
222228
reverse_diff=False,
223229
)
230+
224231
commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, commit_documents)
225232
diff_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, diff_documents)
233+
234+
commit_documents = filter_documents_with_cycodeignore(commit_documents, repo_path)
235+
diff_documents = filter_documents_with_cycodeignore(diff_documents, repo_path)
226236

227237
_scan_commit_range_documents(ctx, commit_documents, diff_documents, scan_parameters=scan_parameters)
228238

@@ -254,10 +264,14 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:
254264
progress_bar_section=ScanProgressBarSection.PREPARE_LOCAL_FILES,
255265
repo_path=repo_path,
256266
)
267+
257268
git_head_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, git_head_documents)
258269
pre_committed_documents = excluder.exclude_irrelevant_documents_to_scan(
259270
consts.SCA_SCAN_TYPE, pre_committed_documents
260271
)
272+
273+
git_head_documents = filter_documents_with_cycodeignore(git_head_documents, repo_path)
274+
pre_committed_documents = filter_documents_with_cycodeignore(pre_committed_documents, repo_path)
261275

262276
perform_sca_pre_hook_range_scan_actions(repo_path, git_head_documents, pre_committed_documents)
263277

@@ -288,7 +302,10 @@ def _scan_secret_pre_commit(ctx: typer.Context, repo_path: str) -> None:
288302
is_git_diff_format=True,
289303
)
290304
)
305+
291306
documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(consts.SECRET_SCAN_TYPE, documents_to_scan)
307+
308+
documents_to_scan = filter_documents_with_cycodeignore(documents_to_scan, repo_path)
292309

293310
scan_documents(ctx, documents_to_scan, get_scan_parameters(ctx), is_git_diff=True)
294311

@@ -301,10 +318,14 @@ def _scan_sast_pre_commit(ctx: typer.Context, repo_path: str, **_) -> None:
301318
progress_bar_section=ScanProgressBarSection.PREPARE_LOCAL_FILES,
302319
repo_path=repo_path,
303320
)
321+
304322
pre_committed_documents = excluder.exclude_irrelevant_documents_to_scan(
305323
consts.SAST_SCAN_TYPE, pre_committed_documents
306324
)
307325
diff_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, diff_documents)
326+
327+
pre_committed_documents = filter_documents_with_cycodeignore(pre_committed_documents, repo_path)
328+
diff_documents = filter_documents_with_cycodeignore(diff_documents, repo_path)
308329

309330
_scan_commit_range_documents(ctx, pre_committed_documents, diff_documents, scan_parameters=scan_parameters)
310331

cycode/cli/apps/scan/repository/repository_command.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from cycode.cli.files_collector.file_excluder import excluder
1212
from cycode.cli.files_collector.repository_documents import get_git_repository_tree_file_entries
1313
from cycode.cli.files_collector.sca.sca_file_collector import add_sca_dependencies_tree_documents_if_needed
14+
from cycode.cli.files_collector.documents_walk_ignore import filter_documents_with_cycodeignore
1415
from cycode.cli.logger import logger
1516
from cycode.cli.models import Document
1617
from cycode.cli.utils.path_utils import get_path_by_os
@@ -59,6 +60,8 @@ def repository_command(
5960
)
6061

6162
documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
63+
64+
documents_to_scan = filter_documents_with_cycodeignore(documents_to_scan, str(path))
6265

6366
add_sca_dependencies_tree_documents_if_needed(ctx, scan_type, documents_to_scan)
6467

cycode/cli/consts.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
IAC_SCAN_SUPPORTED_FILE_EXTENSIONS = ('.tf', '.tf.json', '.json', '.yaml', '.yml', '.dockerfile', '.containerfile')
1818
IAC_SCAN_SUPPORTED_FILE_PREFIXES = ('dockerfile', 'containerfile')
1919

20+
CYCODEIGNORE_FILENAME = '.cycodeignore'
21+
2022
SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE = (
2123
'.DS_Store',
2224
'.bmp',
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import os
2+
from typing import TYPE_CHECKING
3+
4+
from cycode.cli import consts
5+
from cycode.cli.logger import get_logger
6+
from cycode.cli.utils.ignore_utils import IgnoreFilterManager
7+
8+
if TYPE_CHECKING:
9+
from cycode.cli.models import Document
10+
11+
logger = get_logger('Documents Ignores')
12+
13+
14+
def _get_cycodeignore_path(repo_path: str) -> str:
15+
"""Get the path to .cycodeignore file in the repository root."""
16+
return os.path.join(repo_path, consts.CYCODEIGNORE_FILENAME)
17+
18+
19+
def _create_ignore_filter_manager(repo_path: str, cycodeignore_path: str) -> IgnoreFilterManager:
20+
"""Create IgnoreFilterManager with .cycodeignore file."""
21+
return IgnoreFilterManager.build(
22+
path=repo_path,
23+
global_ignore_file_paths=[cycodeignore_path],
24+
global_patterns=[],
25+
)
26+
27+
28+
def _log_ignored_files(repo_path: str, dirpath: str, ignored_dirnames: list[str], ignored_filenames: list[str]) -> None:
29+
"""Log ignored files for debugging (similar to walk_ignore function)."""
30+
rel_dirpath = '' if dirpath == repo_path else os.path.relpath(dirpath, repo_path)
31+
display_dir = rel_dirpath or '.'
32+
33+
for is_dir, names in (
34+
(True, ignored_dirnames),
35+
(False, ignored_filenames),
36+
):
37+
for name in names:
38+
full_path = os.path.join(repo_path, display_dir, name)
39+
if is_dir:
40+
full_path = os.path.join(full_path, '*')
41+
logger.debug('Ignoring match %s', full_path)
42+
43+
44+
def _build_allowed_paths_set(ignore_filter_manager: IgnoreFilterManager, repo_path: str) -> set[str]:
45+
"""Build set of allowed file paths using walk_with_ignored."""
46+
allowed_paths = set()
47+
48+
for dirpath, dirnames, filenames, ignored_dirnames, ignored_filenames in ignore_filter_manager.walk_with_ignored():
49+
_log_ignored_files(repo_path, dirpath, ignored_dirnames, ignored_filenames)
50+
51+
for filename in filenames:
52+
file_path = os.path.join(dirpath, filename)
53+
allowed_paths.add(file_path)
54+
55+
return allowed_paths
56+
57+
58+
def _get_document_check_path(document: 'Document', repo_path: str) -> str:
59+
"""Get the normalized absolute path for a document to check against allowed paths."""
60+
check_path = document.absolute_path
61+
if not check_path:
62+
if os.path.isabs(document.path):
63+
check_path = document.path
64+
else:
65+
check_path = os.path.join(repo_path, document.path)
66+
67+
return os.path.normpath(check_path)
68+
69+
70+
def _filter_documents_by_allowed_paths(
71+
documents: list['Document'],
72+
allowed_paths: set[str],
73+
repo_path: str
74+
) -> list['Document']:
75+
"""Filter documents by checking if their paths are in the allowed set."""
76+
filtered_documents = []
77+
78+
for document in documents:
79+
try:
80+
check_path = _get_document_check_path(document, repo_path)
81+
82+
if check_path in allowed_paths:
83+
filtered_documents.append(document)
84+
else:
85+
relative_path = os.path.relpath(check_path, repo_path)
86+
logger.debug('Filtered out document due to .cycodeignore: %s', relative_path)
87+
except Exception as e:
88+
logger.debug('Error processing document %s: %s', document.path, e)
89+
# Include document if we can't determine if it should be ignored
90+
filtered_documents.append(document)
91+
92+
return filtered_documents
93+
94+
95+
def filter_documents_with_cycodeignore(documents: list['Document'], repo_path: str) -> list['Document']:
96+
"""Filter documents based on .cycodeignore patterns.
97+
98+
This function uses .cycodeignore file in the repository root to filter out
99+
documents whose paths match any of those patterns.
100+
101+
Args:
102+
documents: List of Document objects to filter
103+
repo_path: Path to the repository root
104+
105+
Returns:
106+
List of Document objects that don't match any .cycodeignore patterns
107+
"""
108+
cycodeignore_path = _get_cycodeignore_path(repo_path)
109+
110+
if not os.path.exists(cycodeignore_path):
111+
return documents
112+
113+
ignore_filter_manager = _create_ignore_filter_manager(repo_path, cycodeignore_path)
114+
115+
allowed_paths = _build_allowed_paths_set(ignore_filter_manager, repo_path)
116+
117+
filtered_documents = _filter_documents_by_allowed_paths(documents, allowed_paths, repo_path)
118+
119+
logger.debug('Filtered %d documents using .cycodeignore patterns', len(documents) - len(filtered_documents))
120+
return filtered_documents

cycode/cli/files_collector/walk_ignore.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import os
22
from collections.abc import Generator, Iterable
3+
from typing import TYPE_CHECKING
34

5+
from cycode.cli import consts
46
from cycode.cli.logger import get_logger
57
from cycode.cli.utils.ignore_utils import IgnoreFilterManager
68

79
logger = get_logger('Ignores')
810

911
_SUPPORTED_IGNORE_PATTERN_FILES = {
1012
'.gitignore',
11-
'.cycodeignore',
13+
consts.CYCODEIGNORE_FILENAME,
1214
}
1315
_DEFAULT_GLOBAL_IGNORE_PATTERNS = [
1416
'.git',
@@ -56,4 +58,4 @@ def walk_ignore(path: str) -> Generator[tuple[str, list[str], list[str]], None,
5658
full_path = os.path.join(full_path, '*')
5759
logger.debug('Ignoring match %s', full_path)
5860

59-
yield dirpath, dirnames, filenames
61+
yield dirpath, dirnames, filenames

0 commit comments

Comments
 (0)