11""" Process a query by parsing input, cloning a repository, and generating a summary. """
22
3+ import os
34from functools import partial
45
56from fastapi import Request
@@ -46,43 +47,23 @@ async def process_query(
4647 _TemplateResponse
4748 Rendered template response containing the processed results or an error message.
4849
49- Raises
50- ------
51- ValueError
52- If an invalid pattern type is provided.
5350 """
54- if pattern_type == "include" :
55- include_patterns = pattern
56- exclude_patterns = None
57- elif pattern_type == "exclude" :
58- exclude_patterns = pattern
59- include_patterns = None
60- else :
61- raise ValueError (f"Invalid pattern type: { pattern_type } " )
62-
51+ include_patterns , exclude_patterns = validate_pattern_type (pattern_type , pattern )
6352 template = "index.jinja" if is_index else "git.jinja"
6453 template_response = partial (templates .TemplateResponse , name = template )
6554 max_file_size = log_slider_to_size (slider_position )
6655
67- context = {
68- "request" : request ,
69- "repo_url" : input_text ,
70- "examples" : EXAMPLE_REPOS if is_index else [],
71- "default_file_size" : slider_position ,
72- "pattern_type" : pattern_type ,
73- "pattern" : pattern ,
74- }
56+ context = create_context (request , input_text , slider_position , pattern_type , pattern , is_index )
7557
7658 try :
77- parsed_query : ParsedQuery = await parse_query (
59+ parsed_query = await parse_query (
7860 source = input_text ,
7961 max_file_size = max_file_size ,
8062 from_web = True ,
8163 include_patterns = include_patterns ,
8264 ignore_patterns = exclude_patterns ,
8365 )
84- if not parsed_query .url :
85- raise ValueError ("The 'url' parameter is required." )
66+ validate_parsed_query (parsed_query )
8667
8768 clone_config = CloneConfig (
8869 url = parsed_query .url ,
@@ -91,25 +72,18 @@ async def process_query(
9172 branch = parsed_query .branch ,
9273 )
9374 await clone_repo (clone_config )
75+
76+ update_ignore_patterns (parsed_query , clone_config .local_path )
77+
9478 summary , tree , content = run_ingest_query (parsed_query )
95- with open (f"{ clone_config .local_path } .txt" , "w" , encoding = "utf-8" ) as f :
96- f .write (tree + "\n " + content )
79+ save_ingest_result (clone_config .local_path , tree , content )
80+ content = filter_ignored_files (parsed_query , content )
81+
9782 except Exception as e :
98- # hack to print error message when query is not defined
99- if "query" in locals () and parsed_query is not None and isinstance (parsed_query , dict ):
100- _print_error (parsed_query ["url" ], e , max_file_size , pattern_type , pattern )
101- else :
102- print (f"{ Colors .BROWN } WARN{ Colors .END } : { Colors .RED } <- { Colors .END } " , end = "" )
103- print (f"{ Colors .RED } { e } { Colors .END } " )
104-
105- context ["error_message" ] = f"Error: { e } "
83+ handle_query_error (e , parsed_query , max_file_size , pattern_type , pattern , context )
10684 return template_response (context = context )
10785
108- if len (content ) > MAX_DISPLAY_SIZE :
109- content = (
110- f"(Files content cropped to { int (MAX_DISPLAY_SIZE / 1_000 )} k characters, "
111- "download full ingest to see more)\n " + content [:MAX_DISPLAY_SIZE ]
112- )
86+ content = truncate_content (content )
11387
11488 _print_success (
11589 url = parsed_query .url ,
@@ -132,10 +106,207 @@ async def process_query(
132106 return template_response (context = context )
133107
134108
109+ def validate_pattern_type (pattern_type : str , pattern : str ):
110+ """
111+ Ensure valid pattern type and return the corresponding include/exclude patterns.
112+
113+ Parameters
114+ ----------
115+ pattern_type : str
116+ Specifies the type of pattern, either "include" or "exclude".
117+ pattern : str
118+ The pattern string to be included or excluded.
119+
120+ Returns
121+ -------
122+ tuple
123+ A tuple containing either the include or exclude pattern.
124+
125+ Raises
126+ ------
127+ ValueError
128+ If an invalid pattern type is provided.
129+ """
130+ if pattern_type == "include" :
131+ return pattern , None
132+ if pattern_type == "exclude" :
133+ return None , pattern
134+ raise ValueError (f"Invalid pattern type: { pattern_type } " )
135+
136+
137+ def create_context (
138+ request : Request , input_text : str , slider_position : int , pattern_type : str , pattern : str , is_index : bool
139+ ) -> dict :
140+ """
141+ Prepare the context dictionary for rendering templates.
142+
143+ Parameters
144+ ----------
145+ request : Request
146+ The HTTP request object.
147+ input_text : str
148+ The user-provided input text (Git repository URL or slug).
149+ slider_position : int
150+ The position of the slider, representing the maximum file size in the query.
151+ pattern_type : str
152+ Type of pattern to use, either "include" or "exclude".
153+ pattern : str
154+ The pattern string to include or exclude.
155+ is_index : bool
156+ Boolean flag indicating if the request is for the index page.
157+
158+ Returns
159+ -------
160+ dict
161+ A dictionary containing template context data.
162+ """
163+ return {
164+ "request" : request ,
165+ "repo_url" : input_text ,
166+ "examples" : EXAMPLE_REPOS if is_index else [],
167+ "default_file_size" : slider_position ,
168+ "pattern_type" : pattern_type ,
169+ "pattern" : pattern ,
170+ }
171+
172+
173+ def validate_parsed_query (parsed_query : ParsedQuery ):
174+ """
175+ Check if the parsed query contains a valid URL.
176+
177+ Parameters
178+ ----------
179+ parsed_query : ParsedQuery
180+ The parsed query object containing repository information.
181+
182+ Raises
183+ ------
184+ ValueError
185+ If the URL parameter is missing in the parsed query.
186+ """
187+ if not parsed_query .url :
188+ raise ValueError ("The 'url' parameter is required." )
189+
190+
191+ def update_ignore_patterns (parsed_query : ParsedQuery , local_path : str ):
192+ """
193+ Load ignore patterns from `.gitingestignore` file if present.
194+
195+ Parameters
196+ ----------
197+ parsed_query : ParsedQuery
198+ The parsed query object containing repository details.
199+ local_path : str
200+ The local path where the repository is cloned.
201+ """
202+ ignore_file_path = os .path .join (local_path , ".gitingestignore" )
203+ if os .path .exists (ignore_file_path ):
204+ with open (ignore_file_path , encoding = "utf-8" ) as ignore_file :
205+ additional_ignore_patterns = [
206+ line .strip () for line in ignore_file if line .strip () and not line .startswith ("#" )
207+ ]
208+
209+ if additional_ignore_patterns :
210+ parsed_query .ignore_patterns = parsed_query .ignore_patterns or set ()
211+ parsed_query .ignore_patterns .update (additional_ignore_patterns )
212+
213+
214+ def save_ingest_result (local_path : str , tree : str , content : str ):
215+ """
216+ Save the repository tree and file content to a text file.
217+
218+ Parameters
219+ ----------
220+ local_path : str
221+ The local path where the repository is cloned.
222+ tree : str
223+ The repository tree structure.
224+ content : str
225+ The ingested file content.
226+ """
227+ with open (f"{ local_path } .txt" , "w" , encoding = "utf-8" ) as f :
228+ f .write (tree + "\n " + content )
229+
230+
231+ def filter_ignored_files (parsed_query : ParsedQuery , content : str ) -> str :
232+ """
233+ Remove ignored file patterns from content.
234+
235+ Parameters
236+ ----------
237+ parsed_query : ParsedQuery
238+ The parsed query object containing ignore patterns.
239+ content : str
240+ The content to be filtered.
241+
242+ Returns
243+ -------
244+ str
245+ The filtered content without ignored patterns.
246+ """
247+ if parsed_query .ignore_patterns :
248+ content = "\n " .join (
249+ line
250+ for line in content .splitlines ()
251+ if not any (ignored in line for ignored in parsed_query .ignore_patterns )
252+ )
253+ return content
254+
255+
256+ def handle_query_error (
257+ e : Exception , parsed_query : ParsedQuery , max_file_size : int , pattern_type : str , pattern : str , context : dict
258+ ):
259+ """
260+ Handle exceptions during query processing and log errors.
261+
262+ Parameters
263+ ----------
264+ e : Exception
265+ The exception raised during processing.
266+ parsed_query : ParsedQuery
267+ The parsed query object.
268+ max_file_size : int
269+ The maximum file size allowed for the query, in bytes.
270+ pattern_type : str
271+ Specifies the type of pattern used.
272+ pattern : str
273+ The actual pattern string used.
274+ context : dict
275+ The template context dictionary.
276+ """
277+ if "query" in locals () and parsed_query is not None and isinstance (parsed_query , dict ):
278+ _print_error (parsed_query ["url" ], e , max_file_size , pattern_type , pattern )
279+ else :
280+ print (f"{ Colors .BROWN } WARN{ Colors .END } : { Colors .RED } <- { Colors .END } { Colors .RED } { e } { Colors .END } " )
281+
282+ context ["error_message" ] = f"Error: { e } "
283+
284+
285+ def truncate_content (content : str ) -> str :
286+ """
287+ Truncate content if it exceeds the maximum display size.
288+
289+ Parameters
290+ ----------
291+ content : str
292+ The content to be truncated.
293+
294+ Returns
295+ -------
296+ str
297+ The truncated content, if applicable.
298+ """
299+ if len (content ) > MAX_DISPLAY_SIZE :
300+ content = (
301+ f"(Files content cropped to { int (MAX_DISPLAY_SIZE / 1_000 )} k characters, "
302+ "download full ingest to see more)\n " + content [:MAX_DISPLAY_SIZE ]
303+ )
304+ return content
305+
306+
135307def _print_query (url : str , max_file_size : int , pattern_type : str , pattern : str ) -> None :
136308 """
137- Print a formatted summary of the query details, including the URL, file size,
138- and pattern information, for easier debugging or logging.
309+ Print a formatted summary of the query details.
139310
140311 Parameters
141312 ----------
@@ -151,16 +322,16 @@ def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str)
151322 print (f"{ Colors .WHITE } { url :<20} { Colors .END } " , end = "" )
152323 if int (max_file_size / 1024 ) != 50 :
153324 print (f" | { Colors .YELLOW } Size: { int (max_file_size / 1024 )} kb{ Colors .END } " , end = "" )
154- if pattern_type == "include" and pattern != "" :
325+ if pattern_type == "include" and pattern :
155326 print (f" | { Colors .YELLOW } Include { pattern } { Colors .END } " , end = "" )
156- elif pattern_type == "exclude" and pattern != "" :
327+ elif pattern_type == "exclude" and pattern :
157328 print (f" | { Colors .YELLOW } Exclude { pattern } { Colors .END } " , end = "" )
329+ print ()
158330
159331
160332def _print_error (url : str , e : Exception , max_file_size : int , pattern_type : str , pattern : str ) -> None :
161333 """
162- Print a formatted error message including the URL, file size, pattern details, and the exception encountered,
163- for debugging or logging purposes.
334+ Print a formatted error message including details of the exception.
164335
165336 Parameters
166337 ----------
@@ -182,8 +353,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str,
182353
183354def _print_success (url : str , max_file_size : int , pattern_type : str , pattern : str , summary : str ) -> None :
184355 """
185- Print a formatted success message, including the URL, file size, pattern details, and a summary with estimated
186- tokens, for debugging or logging purposes.
356+ Print a formatted success message, including estimated tokens.
187357
188358 Parameters
189359 ----------
0 commit comments