|
| 1 | +import os |
| 2 | +from typing import TYPE_CHECKING |
| 3 | + |
| 4 | +from cycode.cli import consts |
| 5 | +from cycode.cli.logger import get_logger |
| 6 | +from cycode.cli.utils.ignore_utils import IgnoreFilterManager |
| 7 | + |
| 8 | +if TYPE_CHECKING: |
| 9 | + from cycode.cli.models import Document |
| 10 | + |
| 11 | +logger = get_logger('Documents Ignores') |
| 12 | + |
| 13 | + |
| 14 | +def _get_cycodeignore_path(repo_path: str) -> str: |
| 15 | + """Get the path to .cycodeignore file in the repository root.""" |
| 16 | + return os.path.join(repo_path, consts.CYCODEIGNORE_FILENAME) |
| 17 | + |
| 18 | + |
| 19 | +def _create_ignore_filter_manager(repo_path: str, cycodeignore_path: str) -> IgnoreFilterManager: |
| 20 | + """Create IgnoreFilterManager with .cycodeignore file.""" |
| 21 | + return IgnoreFilterManager.build( |
| 22 | + path=repo_path, |
| 23 | + global_ignore_file_paths=[cycodeignore_path], |
| 24 | + global_patterns=[], |
| 25 | + ) |
| 26 | + |
| 27 | + |
| 28 | +def _log_ignored_files(repo_path: str, dirpath: str, ignored_dirnames: list[str], ignored_filenames: list[str]) -> None: |
| 29 | + """Log ignored files for debugging (similar to walk_ignore function).""" |
| 30 | + rel_dirpath = '' if dirpath == repo_path else os.path.relpath(dirpath, repo_path) |
| 31 | + display_dir = rel_dirpath or '.' |
| 32 | + |
| 33 | + for is_dir, names in ( |
| 34 | + (True, ignored_dirnames), |
| 35 | + (False, ignored_filenames), |
| 36 | + ): |
| 37 | + for name in names: |
| 38 | + full_path = os.path.join(repo_path, display_dir, name) |
| 39 | + if is_dir: |
| 40 | + full_path = os.path.join(full_path, '*') |
| 41 | + logger.debug('Ignoring match %s', full_path) |
| 42 | + |
| 43 | + |
| 44 | +def _build_allowed_paths_set(ignore_filter_manager: IgnoreFilterManager, repo_path: str) -> set[str]: |
| 45 | + """Build set of allowed file paths using walk_with_ignored.""" |
| 46 | + allowed_paths = set() |
| 47 | + |
| 48 | + for dirpath, dirnames, filenames, ignored_dirnames, ignored_filenames in ignore_filter_manager.walk_with_ignored(): |
| 49 | + _log_ignored_files(repo_path, dirpath, ignored_dirnames, ignored_filenames) |
| 50 | + |
| 51 | + for filename in filenames: |
| 52 | + file_path = os.path.join(dirpath, filename) |
| 53 | + allowed_paths.add(file_path) |
| 54 | + |
| 55 | + return allowed_paths |
| 56 | + |
| 57 | + |
| 58 | +def _get_document_check_path(document: 'Document', repo_path: str) -> str: |
| 59 | + """Get the normalized absolute path for a document to check against allowed paths.""" |
| 60 | + check_path = document.absolute_path |
| 61 | + if not check_path: |
| 62 | + if os.path.isabs(document.path): |
| 63 | + check_path = document.path |
| 64 | + else: |
| 65 | + check_path = os.path.join(repo_path, document.path) |
| 66 | + |
| 67 | + return os.path.normpath(check_path) |
| 68 | + |
| 69 | + |
| 70 | +def _filter_documents_by_allowed_paths( |
| 71 | + documents: list['Document'], |
| 72 | + allowed_paths: set[str], |
| 73 | + repo_path: str |
| 74 | +) -> list['Document']: |
| 75 | + """Filter documents by checking if their paths are in the allowed set.""" |
| 76 | + filtered_documents = [] |
| 77 | + |
| 78 | + for document in documents: |
| 79 | + try: |
| 80 | + check_path = _get_document_check_path(document, repo_path) |
| 81 | + |
| 82 | + if check_path in allowed_paths: |
| 83 | + filtered_documents.append(document) |
| 84 | + else: |
| 85 | + relative_path = os.path.relpath(check_path, repo_path) |
| 86 | + logger.debug('Filtered out document due to .cycodeignore: %s', relative_path) |
| 87 | + except Exception as e: |
| 88 | + logger.debug('Error processing document %s: %s', document.path, e) |
| 89 | + # Include document if we can't determine if it should be ignored |
| 90 | + filtered_documents.append(document) |
| 91 | + |
| 92 | + return filtered_documents |
| 93 | + |
| 94 | + |
| 95 | +def filter_documents_with_cycodeignore(documents: list['Document'], repo_path: str) -> list['Document']: |
| 96 | + """Filter documents based on .cycodeignore patterns. |
| 97 | + |
| 98 | + This function uses .cycodeignore file in the repository root to filter out |
| 99 | + documents whose paths match any of those patterns. |
| 100 | + |
| 101 | + Args: |
| 102 | + documents: List of Document objects to filter |
| 103 | + repo_path: Path to the repository root |
| 104 | + |
| 105 | + Returns: |
| 106 | + List of Document objects that don't match any .cycodeignore patterns |
| 107 | + """ |
| 108 | + cycodeignore_path = _get_cycodeignore_path(repo_path) |
| 109 | + |
| 110 | + if not os.path.exists(cycodeignore_path): |
| 111 | + return documents |
| 112 | + |
| 113 | + ignore_filter_manager = _create_ignore_filter_manager(repo_path, cycodeignore_path) |
| 114 | + |
| 115 | + allowed_paths = _build_allowed_paths_set(ignore_filter_manager, repo_path) |
| 116 | + |
| 117 | + filtered_documents = _filter_documents_by_allowed_paths(documents, allowed_paths, repo_path) |
| 118 | + |
| 119 | + logger.debug('Filtered %d documents using .cycodeignore patterns', len(documents) - len(filtered_documents)) |
| 120 | + return filtered_documents |
0 commit comments