1+ # src/treemapper/ignore.py
12import logging
23import os
34from pathlib import Path
@@ -10,11 +11,17 @@ def read_ignore_file(file_path: Path) -> List[str]:
1011 """Read the ignore patterns from the specified ignore file."""
1112 ignore_patterns = []
1213 if file_path .is_file ():
13- with file_path .open ('r' ) as f :
14- ignore_patterns = [line .strip () for line in f
15- if line .strip () and not line .startswith ('#' )]
16- logging .info (f"Using ignore patterns from { file_path } " )
17- logging .debug (f"Read ignore patterns from { file_path } : { ignore_patterns } " )
14+ try :
15+ # Explicitly use utf-8 for reading ignore files
16+ with file_path .open ('r' , encoding = 'utf-8' ) as f :
17+ ignore_patterns = [line .strip () for line in f
18+ if line .strip () and not line .startswith ('#' )]
19+ logging .info (f"Using ignore patterns from { file_path } " )
20+ logging .debug (f"Read ignore patterns from { file_path } : { ignore_patterns } " )
21+ except IOError as e :
22+ logging .warning (f"Could not read ignore file { file_path } : { e } " )
23+ except UnicodeDecodeError as e :
24+ logging .warning (f"Could not decode ignore file { file_path } as UTF-8: { e } " )
1825 return ignore_patterns
1926
2027
@@ -27,86 +34,143 @@ def load_pathspec(patterns: List[str], syntax='gitwildmatch') -> pathspec.PathSp
2734
2835def get_ignore_specs (
2936 root_dir : Path ,
30- custom_ignore_file : Path = None ,
37+ custom_ignore_file : Path | None = None ,
3138 no_default_ignores : bool = False ,
32- output_file : Path = None
39+ output_file : Path | None = None
3340) -> Tuple [pathspec .PathSpec , Dict [Path , pathspec .PathSpec ]]:
3441 """Get combined ignore specs and git ignore specs."""
3542 default_patterns = get_default_patterns (root_dir , no_default_ignores , output_file )
3643 custom_patterns = get_custom_patterns (root_dir , custom_ignore_file )
37- combined_patterns = custom_patterns if no_default_ignores else default_patterns + custom_patterns
44+
45+ # Determine combined patterns based on no_default_ignores flag
46+ if no_default_ignores :
47+ # Only use custom patterns + output file ignore (if applicable and INSIDE root)
48+ combined_patterns = custom_patterns
49+ if output_file :
50+ try :
51+ resolved_output = output_file .resolve ()
52+ resolved_root = root_dir .resolve ()
53+ if resolved_output .is_relative_to (resolved_root ): # Check if output is inside root
54+ relative_output_str = resolved_output .relative_to (resolved_root ).as_posix ()
55+ output_pattern = f"/{ relative_output_str } " # Pattern relative to root
56+ if output_pattern not in combined_patterns : # Avoid duplicates if specified in custom
57+ combined_patterns .append (output_pattern )
58+ logging .debug (f"Adding output file to ignores (no_default_ignores=True): { output_pattern } " )
59+ except ValueError : # Not relative
60+ pass
61+ except Exception as e :
62+ logging .warning (f"Could not determine relative path for output file { output_file } : { e } " )
63+ else :
64+ # Combine default and custom patterns
65+ combined_patterns = default_patterns + custom_patterns
66+
67+ # ---> ЛОГИРОВАНИЕ ДЛЯ ДИАГНОСТИКИ <---
68+ logging .debug (f"Ignore specs params: no_default_ignores={ no_default_ignores } " )
69+ logging .debug (f"Default patterns (used unless no_default_ignores): { default_patterns } " )
70+ logging .debug (f"Custom patterns (-i): { custom_patterns } " )
71+ logging .debug (f"Combined patterns for spec: { combined_patterns } " )
72+ # ---> КОНЕЦ ЛОГИРОВАНИЯ <---
73+
3874 combined_spec = load_pathspec (combined_patterns )
3975 gitignore_specs = get_gitignore_specs (root_dir , no_default_ignores )
4076
4177 return combined_spec , gitignore_specs
4278
43- def get_default_patterns (root_dir : Path , no_default_ignores : bool , output_file : Path ) -> List [str ]:
44- """Retrieve default ignore patterns."""
79+
80+ def get_default_patterns (root_dir : Path , no_default_ignores : bool , output_file : Path | None ) -> List [str ]:
81+ """Retrieve default ignore patterns ONLY IF no_default_ignores is FALSE."""
4582 if no_default_ignores :
83+ # This function shouldn't even be called by get_ignore_specs if True,
84+ # but double-check for safety. Or rely on get_ignore_specs logic.
85+ # Let's return empty to be safe if called directly somehow.
4686 return []
4787
4888 patterns = []
49- # Add .treemapperignore patterns
89+ # Add patterns from .treemapperignore (located in root_dir)
5090 treemapper_ignore_file = root_dir / ".treemapperignore"
5191 patterns .extend (read_ignore_file (treemapper_ignore_file ))
5292
53- # Add default git patterns
54- patterns .extend ([".git/" , ".git/**" ])
55-
56- # Add the output file to ignore patterns
93+ # Add the output file to ignore patterns IF it's inside root_dir
5794 if output_file :
5895 try :
59- relative_output = output_file .resolve ().relative_to (root_dir .resolve ())
60- patterns .append (str (relative_output ))
61- if str (relative_output .parent ) != "." :
62- patterns .append (str (relative_output .parent ) + "/" )
63- except ValueError :
64- pass # Output file is outside root_dir; no need to add to ignores
96+ resolved_output = output_file .resolve ()
97+ resolved_root = root_dir .resolve ()
98+ # Check if output is inside root_dir using is_relative_to (Python 3.9+)
99+ # Use try/except ValueError for compatibility or if check fails
100+ try :
101+ relative_output = resolved_output .relative_to (resolved_root )
102+ # Add only the file itself, starting with / to anchor to root
103+ output_pattern = f"/{ relative_output .as_posix ()} "
104+ patterns .append (output_pattern )
105+ logging .debug (f"Adding output file to default ignores: { output_pattern } " )
106+ # --- УДАЛЕНО ИГНОРИРОВАНИЕ РОДИТЕЛЬСКОЙ ПАПКИ ---
107+ except ValueError :
108+ # Output file is outside root_dir, no need to add to default ignores
109+ logging .debug (f"Output file { output_file } is outside root directory { root_dir } , not adding to default ignores." )
110+
111+ except Exception as e : # Catch potential resolve() or other errors
112+ logging .warning (f"Could not determine relative path for output file { output_file } : { e } " )
65113
66114 return patterns
67115
68- def get_custom_patterns (root_dir : Path , custom_ignore_file : Path ) -> List [str ]:
69- """Retrieve custom ignore patterns."""
116+
117+ def get_custom_patterns (root_dir : Path , custom_ignore_file : Path | None ) -> List [str ]:
118+ """Retrieve custom ignore patterns from the file specified with -i."""
70119 if not custom_ignore_file :
71120 return []
72121
73- custom_ignore_file = custom_ignore_file if custom_ignore_file .is_absolute () else root_dir / custom_ignore_file
122+ # Resolve custom ignore file path relative to CWD if not absolute
123+ # Note: If run_mapper changes CWD, this might need adjustment depending on expected behavior.
124+ # Assuming custom_ignore_file path is relative to where the command is run.
125+ if not custom_ignore_file .is_absolute ():
126+ custom_ignore_file = Path .cwd () / custom_ignore_file
127+
74128 if custom_ignore_file .is_file ():
75129 return read_ignore_file (custom_ignore_file )
130+ else :
131+ # Log warning only if the file was explicitly provided but not found
132+ logging .warning (f"Custom ignore file '{ custom_ignore_file } ' not found." )
133+ return []
76134
77- logging .warning (f"Custom ignore file '{ custom_ignore_file } ' not found." )
78- return []
79135
80136def get_gitignore_specs (root_dir : Path , no_default_ignores : bool ) -> Dict [Path , pathspec .PathSpec ]:
81- """Retrieve gitignore specs for all .gitignore files in the directory ."""
137+ """Retrieve gitignore specs for all .gitignore files found within root_dir ."""
82138 if no_default_ignores :
83- return {}
139+ return {} # Do not load any .gitignore files if flag is set
84140
85141 gitignore_specs = {}
86- for dirpath , _ , filenames in os .walk (root_dir ):
87- if ".gitignore" in filenames :
88- gitignore_path = Path (dirpath ) / ".gitignore"
89- patterns = read_ignore_file (gitignore_path )
90- gitignore_specs [Path (dirpath )] = load_pathspec (patterns )
142+ try :
143+ for dirpath_str , dirnames , filenames in os .walk (root_dir , topdown = True ):
144+ # Avoid recursing into directories that should be ignored by parent specs
145+ # (basic protection, full gitignore precedence is complex)
146+ # This requires passing parent specs down, making it much more complex.
147+ # For now, we load all found .gitignores. A simple optimization:
148+ if '.git' in dirnames :
149+ dirnames .remove ('.git' ) # Don't recurse into .git
150+
151+ if ".gitignore" in filenames :
152+ gitignore_path = Path (dirpath_str ) / ".gitignore"
153+ patterns = read_ignore_file (gitignore_path )
154+ if patterns : # Only load spec if there are actual patterns
155+ gitignore_specs [Path (dirpath_str )] = load_pathspec (patterns )
156+
157+ except OSError as e :
158+ logging .warning (f"Error walking directory { root_dir } to find .gitignore files: { e } " )
91159
92160 return gitignore_specs
93161
94162
95-
96- def should_ignore (file_path : str , combined_spec : pathspec .PathSpec ) -> bool :
163+ def should_ignore (relative_path_str : str , combined_spec : pathspec .PathSpec ) -> bool :
97164 """Check if a file or directory should be ignored based on combined pathspec."""
98- paths_to_check = [file_path ]
99-
100- # Add path variations for checking
101- if file_path .endswith ('/' ):
102- paths_to_check .append (file_path )
103-
104- # Add parent directories with trailing slash
105- for part in Path (file_path ).parents :
106- if part != Path ('.' ):
107- paths_to_check .append (part .as_posix () + '/' )
108-
109- result = any (combined_spec .match_file (path ) for path in paths_to_check )
110- logging .debug (
111- f"Should ignore '{ file_path } ': { result } (checking paths: { paths_to_check } )" )
112- return result
165+ # pathspec should handle directory matching correctly if patterns end with '/'
166+ # We primarily need to check the path itself. Checking parents might over-ignore.
167+ # Let's simplify: only check the path string itself.
168+ # Ensure directories passed from build_tree end with '/'
169+ is_ignored = combined_spec .match_file (relative_path_str )
170+
171+ # Optional: Check without trailing slash if it's a directory? Pathspec might do this.
172+ # if relative_path_str.endswith('/') and not is_ignored:
173+ # is_ignored = combined_spec.match_file(relative_path_str.rstrip('/'))
174+
175+ logging .debug (f"Checking combined spec ignore for '{ relative_path_str } ': { is_ignored } " )
176+ return is_ignored
0 commit comments