Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ jobs:
run: poetry install

- name: Run Tests
run: poetry run pytest
run: poetry run python -m pytest
4 changes: 2 additions & 2 deletions .github/workflows/tests_full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
uses: actions/cache@v3
with:
path: ~/.local
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-1 # increment to reset cache
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-2 # increment to reset cache

- name: Setup Poetry
if: steps.cached-poetry.outputs.cache-hit != 'true'
Expand All @@ -71,4 +71,4 @@ jobs:
./dist/cycode-cli version

- name: Run pytest
run: poetry run pytest
run: poetry run python -m pytest
25 changes: 12 additions & 13 deletions cycode/cli/files_collector/path_documents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import TYPE_CHECKING, Iterable, List, Tuple
from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple

import pathspec

Expand All @@ -10,6 +10,7 @@
is_iac,
is_tfplan_file,
)
from cycode.cli.files_collector.walk_ignore import walk_ignore
from cycode.cli.models import Document
from cycode.cli.utils.path_utils import get_absolute_path, get_file_content
from cycode.cyclient import logger
Expand All @@ -18,17 +19,18 @@
from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection


def _get_all_existing_files_in_directory(path: str) -> List[str]:
def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns: bool = True) -> List[str]:
files: List[str] = []

for root, _, filenames in os.walk(path):
walk_func = walk_ignore if walk_with_ignore_patterns else os.walk
for root, _, filenames in walk_func(path):
for filename in filenames:
files.append(os.path.join(root, filename))

return files


def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]:
def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[str]] = None) -> List[str]:
absolute_path = get_absolute_path(path)

if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path):
Expand All @@ -37,24 +39,21 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L
if os.path.isfile(absolute_path):
return [absolute_path]

all_file_paths = set(_get_all_existing_files_in_directory(absolute_path))
file_paths = _get_all_existing_files_in_directory(absolute_path)

path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns)
excluded_file_paths = set(path_spec.match_files(all_file_paths))
if exclude_patterns:
path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns)
file_paths = path_spec.match_files(file_paths, negate=True)

relevant_file_paths = all_file_paths - excluded_file_paths

return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)]
return [file_path for file_path in file_paths if os.path.isfile(file_path)]


def _get_relevant_files(
progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, paths: Tuple[str]
) -> List[str]:
all_files_to_scan = []
for path in paths:
all_files_to_scan.extend(
_get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**'])
)
all_files_to_scan.extend(_get_relevant_files_in_path(path))

# we are double the progress bar section length because we are going to process the files twice
# first time to get the file list with respect of excluded patterns (excluding takes seconds to execute)
Expand Down
78 changes: 78 additions & 0 deletions cycode/cli/files_collector/walk_ignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import os
from collections import defaultdict
from typing import Generator, Iterable, List, Tuple

import pathspec
from pathspec.util import StrPath

from cycode.cli.utils.path_utils import get_file_content
from cycode.cyclient import logger

_SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'}
_DEFAULT_GLOBAL_IGNORE_PATTERNS = [
'**/.git',
'**/.cycode',
]


def _walk_to_top(path: str) -> Iterable[str]:
while os.path.dirname(path) != path:
yield path
path = os.path.dirname(path)

if path:
yield path # Include the top-level directory


def _collect_top_level_ignore_files(path: str) -> List[str]:
ignore_files = []
for dir_path in _walk_to_top(path):
for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES:
ignore_file_path = os.path.join(dir_path, ignore_file)
if os.path.exists(ignore_file_path):
logger.debug('Apply top level ignore file: %s', ignore_file_path)
ignore_files.append(ignore_file_path)
return ignore_files


def _get_global_ignore_patterns(path: str) -> List[str]:
ignore_patterns = _DEFAULT_GLOBAL_IGNORE_PATTERNS.copy()
for ignore_file in _collect_top_level_ignore_files(path):
file_patterns = get_file_content(ignore_file).splitlines()
ignore_patterns.extend(file_patterns)
return ignore_patterns


def _should_include_path(ignore_patterns: List[str], path: StrPath) -> bool:
path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ignore_patterns)
return not path_spec.match_file(path) # works with both files and directories; negative match


def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]:
global_ignore_patterns = _get_global_ignore_patterns(path)
path_to_ignore_patterns = defaultdict(list)

for dirpath, dirnames, filenames in os.walk(path, topdown=True):
# finds and processes ignore files first to get the patterns
for filename in filenames:
filepath = os.path.join(dirpath, filename)
if filename in _SUPPORTED_IGNORE_PATTERN_FILES:
logger.debug('Apply ignore file: %s', filepath)

parent_dir = os.path.dirname(dirpath)
if dirpath not in path_to_ignore_patterns and parent_dir in path_to_ignore_patterns:
# inherit ignore patterns from parent directory on first occurrence
logger.debug('Inherit ignore patterns: %s', {'inherit_from': parent_dir, 'inherit_to': dirpath})
path_to_ignore_patterns[dirpath].extend(path_to_ignore_patterns[parent_dir])

# always read ignore patterns for the current directory
path_to_ignore_patterns[dirpath].extend(get_file_content(filepath).splitlines())

ignore_patterns = global_ignore_patterns + path_to_ignore_patterns.get(dirpath, [])

# decrease recursion depth of os.walk() because of topdown=True by changing the list in-place
# slicing ([:]) is mandatory to change dict in-place!
dirnames[:] = [d for d in dirnames if _should_include_path(ignore_patterns, os.path.join(dirpath, d))]
filenames[:] = [f for f in filenames if _should_include_path(ignore_patterns, os.path.join(dirpath, f))]

yield dirpath, dirnames, filenames
69 changes: 55 additions & 14 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pytest = ">=7.3.1,<7.4.0"
pytest-mock = ">=3.10.0,<3.11.0"
coverage = ">=7.2.3,<7.3.0"
responses = ">=0.23.1,<0.24.0"
pyfakefs = ">=5.7.2,<5.8.0"

[tool.poetry.group.executable.dependencies]
pyinstaller = {version=">=5.13.2,<5.14.0", python=">=3.8,<3.13"}
Expand Down
Empty file.
Loading
Loading