1212from gitingest .config import MAX_FILE_SIZE , TMP_BASE_PATH
1313from gitingest .exceptions import InvalidPatternError
1414from gitingest .ignore_patterns import DEFAULT_IGNORE_PATTERNS
15- from gitingest .repository_clone import _check_repo_exists , fetch_remote_branch_list
15+ from gitingest .repository_clone import CloneConfig , _check_repo_exists , clone_repo , fetch_remote_branch_list
1616
1717HEX_DIGITS : set [str ] = set (string .hexdigits )
1818
@@ -48,6 +48,30 @@ class ParsedQuery: # pylint: disable=too-many-instance-attributes
4848 pattern_type : str | None = None
4949
5050
51+ def parse_ignore_file (ignore_file_path : Path ) -> set [str ]:
52+ """
53+ Parse the .gitingestignore file and return a set of patterns to ignore.
54+
55+ Parameters
56+ ----------
57+ ignore_file_path : Path
58+ Path to the .gitingestignore file
59+
60+ Returns
61+ -------
62+ set[str]
63+ Set of patterns to ignore
64+ """
65+ if not ignore_file_path .exists ():
66+ return set ()
67+
68+ with open (ignore_file_path , encoding = "utf-8" ) as f :
69+ # Read lines, strip whitespace, and filter out empty lines and comments
70+ patterns = {line .strip () for line in f if line .strip () and not line .startswith ("#" )}
71+
72+ return patterns
73+
74+
5175async def parse_query (
5276 source : str ,
5377 max_file_size : int ,
@@ -89,6 +113,24 @@ async def parse_query(
89113 # Local path scenario
90114 parsed_query = _parse_path (source )
91115
116+ # Clone the repository if it's a URL
117+ if parsed_query .url :
118+ clone_config = CloneConfig (
119+ url = parsed_query .url ,
120+ local_path = str (parsed_query .local_path ),
121+ commit = parsed_query .commit ,
122+ branch = parsed_query .branch ,
123+ )
124+ await clone_repo (clone_config )
125+
126+ # Look for .gitingestignore file in the cloned repository
127+ ignore_file_path = Path (parsed_query .local_path ) / ".gitingestignore"
128+ additional_ignore_patterns = parse_ignore_file (ignore_file_path )
129+ if ignore_patterns :
130+ ignore_patterns .update (additional_ignore_patterns )
131+ else :
132+ ignore_patterns = additional_ignore_patterns
133+
92134 # Combine default ignore patterns + custom patterns
93135 ignore_patterns_set = DEFAULT_IGNORE_PATTERNS .copy ()
94136 if ignore_patterns :
@@ -283,17 +325,18 @@ def _normalize_pattern(pattern: str) -> str:
283325 return pattern
284326
285327
286- def _parse_patterns (pattern : set [str ] | str ) -> set [str ]:
328+ def _parse_patterns (patterns : tuple [ str , ...] | set [str ] | str ) -> set [str ]:
287329 """
288330 Parse and validate file/directory patterns for inclusion or exclusion.
289331
290- Takes either a single pattern string or set of pattern strings and processes them into a normalized list.
291- Patterns are split on commas and spaces, validated for allowed characters, and normalized.
332+ Takes either a single pattern string, a tuple of pattern strings, or a set of pattern strings
333+ and processes them into a normalized list. Patterns are split on commas and spaces, validated
334+ for allowed characters, and normalized.
292335
293336 Parameters
294337 ----------
295- pattern : set[str] | str
296- Pattern(s) to parse - either a single string or set of strings
338+ patterns : tuple[str, ...] | set[str] | str
339+ Pattern(s) to parse - either a single string, a tuple of strings, or a set of strings
297340
298341 Returns
299342 -------
@@ -307,7 +350,11 @@ def _parse_patterns(pattern: set[str] | str) -> set[str]:
307350 dash (-), underscore (_), dot (.), forward slash (/), plus (+), and
308351 asterisk (*) are allowed.
309352 """
310- patterns = pattern if isinstance (pattern , set ) else {pattern }
353+ # Convert patterns to a set if it's not already a set
354+ if isinstance (patterns , tuple ):
355+ patterns = set (patterns )
356+ elif isinstance (patterns , str ):
357+ patterns = {patterns }
311358
312359 parsed_patterns : set [str ] = set ()
313360 for p in patterns :
0 commit comments