Skip to content

Commit 24184fc

Browse files
committed
refactor: streamline query processing by removing unnecessary cloning logic and enhancing ignore pattern handling
1 parent 6dafe76 commit 24184fc

File tree

2 files changed

+225
-65
lines changed

2 files changed

+225
-65
lines changed

src/gitingest/query_parser.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from gitingest.config import MAX_FILE_SIZE, TMP_BASE_PATH
1313
from gitingest.exceptions import InvalidPatternError
1414
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
15-
from gitingest.repository_clone import CloneConfig, _check_repo_exists, clone_repo, fetch_remote_branch_list
15+
from gitingest.repository_clone import _check_repo_exists, fetch_remote_branch_list
1616

1717
HEX_DIGITS: set[str] = set(string.hexdigits)
1818

@@ -113,23 +113,13 @@ async def parse_query(
113113
# Local path scenario
114114
parsed_query = _parse_path(source)
115115

116-
# Clone the repository if it's a URL
117-
if parsed_query.url:
118-
clone_config = CloneConfig(
119-
url=parsed_query.url,
120-
local_path=str(parsed_query.local_path),
121-
commit=parsed_query.commit,
122-
branch=parsed_query.branch,
123-
)
124-
await clone_repo(clone_config)
125-
126-
# Look for .gitingestignore file in the cloned repository
127-
ignore_file_path = Path(parsed_query.local_path) / ".gitingestignore"
128-
additional_ignore_patterns = parse_ignore_file(ignore_file_path)
129-
if ignore_patterns:
130-
ignore_patterns.update(additional_ignore_patterns)
131-
else:
132-
ignore_patterns = additional_ignore_patterns
116+
# Look for .gitingestignore file in the local path
117+
ignore_file_path = Path(parsed_query.local_path) / ".gitingestignore"
118+
additional_ignore_patterns = parse_ignore_file(ignore_file_path)
119+
if ignore_patterns:
120+
ignore_patterns.update(additional_ignore_patterns)
121+
else:
122+
ignore_patterns = additional_ignore_patterns
133123

134124
# Combine default ignore patterns + custom patterns
135125
ignore_patterns_set = DEFAULT_IGNORE_PATTERNS.copy()

src/server/query_processor.py

Lines changed: 217 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" Process a query by parsing input, cloning a repository, and generating a summary. """
22

3+
import os
34
from functools import partial
45

56
from fastapi import Request
@@ -46,43 +47,23 @@ async def process_query(
4647
_TemplateResponse
4748
Rendered template response containing the processed results or an error message.
4849
49-
Raises
50-
------
51-
ValueError
52-
If an invalid pattern type is provided.
5350
"""
54-
if pattern_type == "include":
55-
include_patterns = pattern
56-
exclude_patterns = None
57-
elif pattern_type == "exclude":
58-
exclude_patterns = pattern
59-
include_patterns = None
60-
else:
61-
raise ValueError(f"Invalid pattern type: {pattern_type}")
62-
51+
include_patterns, exclude_patterns = validate_pattern_type(pattern_type, pattern)
6352
template = "index.jinja" if is_index else "git.jinja"
6453
template_response = partial(templates.TemplateResponse, name=template)
6554
max_file_size = log_slider_to_size(slider_position)
6655

67-
context = {
68-
"request": request,
69-
"repo_url": input_text,
70-
"examples": EXAMPLE_REPOS if is_index else [],
71-
"default_file_size": slider_position,
72-
"pattern_type": pattern_type,
73-
"pattern": pattern,
74-
}
56+
context = create_context(request, input_text, slider_position, pattern_type, pattern, is_index)
7557

7658
try:
77-
parsed_query: ParsedQuery = await parse_query(
59+
parsed_query = await parse_query(
7860
source=input_text,
7961
max_file_size=max_file_size,
8062
from_web=True,
8163
include_patterns=include_patterns,
8264
ignore_patterns=exclude_patterns,
8365
)
84-
if not parsed_query.url:
85-
raise ValueError("The 'url' parameter is required.")
66+
validate_parsed_query(parsed_query)
8667

8768
clone_config = CloneConfig(
8869
url=parsed_query.url,
@@ -91,25 +72,18 @@ async def process_query(
9172
branch=parsed_query.branch,
9273
)
9374
await clone_repo(clone_config)
75+
76+
update_ignore_patterns(parsed_query, clone_config.local_path)
77+
9478
summary, tree, content = run_ingest_query(parsed_query)
95-
with open(f"{clone_config.local_path}.txt", "w", encoding="utf-8") as f:
96-
f.write(tree + "\n" + content)
79+
save_ingest_result(clone_config.local_path, tree, content)
80+
content = filter_ignored_files(parsed_query, content)
81+
9782
except Exception as e:
98-
# hack to print error message when query is not defined
99-
if "query" in locals() and parsed_query is not None and isinstance(parsed_query, dict):
100-
_print_error(parsed_query["url"], e, max_file_size, pattern_type, pattern)
101-
else:
102-
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
103-
print(f"{Colors.RED}{e}{Colors.END}")
104-
105-
context["error_message"] = f"Error: {e}"
83+
handle_query_error(e, parsed_query, max_file_size, pattern_type, pattern, context)
10684
return template_response(context=context)
10785

108-
if len(content) > MAX_DISPLAY_SIZE:
109-
content = (
110-
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
111-
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
112-
)
86+
content = truncate_content(content)
11387

11488
_print_success(
11589
url=parsed_query.url,
@@ -132,10 +106,207 @@ async def process_query(
132106
return template_response(context=context)
133107

134108

109+
def validate_pattern_type(pattern_type: str, pattern: str):
110+
"""
111+
Ensure valid pattern type and return the corresponding include/exclude patterns.
112+
113+
Parameters
114+
----------
115+
pattern_type : str
116+
Specifies the type of pattern, either "include" or "exclude".
117+
pattern : str
118+
The pattern string to be included or excluded.
119+
120+
Returns
121+
-------
122+
tuple
123+
A tuple containing either the include or exclude pattern.
124+
125+
Raises
126+
------
127+
ValueError
128+
If an invalid pattern type is provided.
129+
"""
130+
if pattern_type == "include":
131+
return pattern, None
132+
if pattern_type == "exclude":
133+
return None, pattern
134+
raise ValueError(f"Invalid pattern type: {pattern_type}")
135+
136+
137+
def create_context(
138+
request: Request, input_text: str, slider_position: int, pattern_type: str, pattern: str, is_index: bool
139+
) -> dict:
140+
"""
141+
Prepare the context dictionary for rendering templates.
142+
143+
Parameters
144+
----------
145+
request : Request
146+
The HTTP request object.
147+
input_text : str
148+
The user-provided input text (Git repository URL or slug).
149+
slider_position : int
150+
The position of the slider, representing the maximum file size in the query.
151+
pattern_type : str
152+
Type of pattern to use, either "include" or "exclude".
153+
pattern : str
154+
The pattern string to include or exclude.
155+
is_index : bool
156+
Boolean flag indicating if the request is for the index page.
157+
158+
Returns
159+
-------
160+
dict
161+
A dictionary containing template context data.
162+
"""
163+
return {
164+
"request": request,
165+
"repo_url": input_text,
166+
"examples": EXAMPLE_REPOS if is_index else [],
167+
"default_file_size": slider_position,
168+
"pattern_type": pattern_type,
169+
"pattern": pattern,
170+
}
171+
172+
173+
def validate_parsed_query(parsed_query: ParsedQuery):
174+
"""
175+
Check if the parsed query contains a valid URL.
176+
177+
Parameters
178+
----------
179+
parsed_query : ParsedQuery
180+
The parsed query object containing repository information.
181+
182+
Raises
183+
------
184+
ValueError
185+
If the URL parameter is missing in the parsed query.
186+
"""
187+
if not parsed_query.url:
188+
raise ValueError("The 'url' parameter is required.")
189+
190+
191+
def update_ignore_patterns(parsed_query: ParsedQuery, local_path: str):
192+
"""
193+
Load ignore patterns from `.gitingestignore` file if present.
194+
195+
Parameters
196+
----------
197+
parsed_query : ParsedQuery
198+
The parsed query object containing repository details.
199+
local_path : str
200+
The local path where the repository is cloned.
201+
"""
202+
ignore_file_path = os.path.join(local_path, ".gitingestignore")
203+
if os.path.exists(ignore_file_path):
204+
with open(ignore_file_path, encoding="utf-8") as ignore_file:
205+
additional_ignore_patterns = [
206+
line.strip() for line in ignore_file if line.strip() and not line.startswith("#")
207+
]
208+
209+
if additional_ignore_patterns:
210+
parsed_query.ignore_patterns = parsed_query.ignore_patterns or set()
211+
parsed_query.ignore_patterns.update(additional_ignore_patterns)
212+
213+
214+
def save_ingest_result(local_path: str, tree: str, content: str):
215+
"""
216+
Save the repository tree and file content to a text file.
217+
218+
Parameters
219+
----------
220+
local_path : str
221+
The local path where the repository is cloned.
222+
tree : str
223+
The repository tree structure.
224+
content : str
225+
The ingested file content.
226+
"""
227+
with open(f"{local_path}.txt", "w", encoding="utf-8") as f:
228+
f.write(tree + "\n" + content)
229+
230+
231+
def filter_ignored_files(parsed_query: ParsedQuery, content: str) -> str:
232+
"""
233+
Remove ignored file patterns from content.
234+
235+
Parameters
236+
----------
237+
parsed_query : ParsedQuery
238+
The parsed query object containing ignore patterns.
239+
content : str
240+
The content to be filtered.
241+
242+
Returns
243+
-------
244+
str
245+
The filtered content without ignored patterns.
246+
"""
247+
if parsed_query.ignore_patterns:
248+
content = "\n".join(
249+
line
250+
for line in content.splitlines()
251+
if not any(ignored in line for ignored in parsed_query.ignore_patterns)
252+
)
253+
return content
254+
255+
256+
def handle_query_error(
257+
e: Exception, parsed_query: ParsedQuery, max_file_size: int, pattern_type: str, pattern: str, context: dict
258+
):
259+
"""
260+
Handle exceptions during query processing and log errors.
261+
262+
Parameters
263+
----------
264+
e : Exception
265+
The exception raised during processing.
266+
parsed_query : ParsedQuery
267+
The parsed query object.
268+
max_file_size : int
269+
The maximum file size allowed for the query, in bytes.
270+
pattern_type : str
271+
Specifies the type of pattern used.
272+
pattern : str
273+
The actual pattern string used.
274+
context : dict
275+
The template context dictionary.
276+
"""
277+
if "query" in locals() and parsed_query is not None and isinstance(parsed_query, dict):
278+
_print_error(parsed_query["url"], e, max_file_size, pattern_type, pattern)
279+
else:
280+
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}{Colors.RED}{e}{Colors.END}")
281+
282+
context["error_message"] = f"Error: {e}"
283+
284+
285+
def truncate_content(content: str) -> str:
286+
"""
287+
Truncate content if it exceeds the maximum display size.
288+
289+
Parameters
290+
----------
291+
content : str
292+
The content to be truncated.
293+
294+
Returns
295+
-------
296+
str
297+
The truncated content, if applicable.
298+
"""
299+
if len(content) > MAX_DISPLAY_SIZE:
300+
content = (
301+
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
302+
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
303+
)
304+
return content
305+
306+
135307
def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
136308
"""
137-
Print a formatted summary of the query details, including the URL, file size,
138-
and pattern information, for easier debugging or logging.
309+
Print a formatted summary of the query details.
139310
140311
Parameters
141312
----------
@@ -151,16 +322,16 @@ def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str)
151322
print(f"{Colors.WHITE}{url:<20}{Colors.END}", end="")
152323
if int(max_file_size / 1024) != 50:
153324
print(f" | {Colors.YELLOW}Size: {int(max_file_size/1024)}kb{Colors.END}", end="")
154-
if pattern_type == "include" and pattern != "":
325+
if pattern_type == "include" and pattern:
155326
print(f" | {Colors.YELLOW}Include {pattern}{Colors.END}", end="")
156-
elif pattern_type == "exclude" and pattern != "":
327+
elif pattern_type == "exclude" and pattern:
157328
print(f" | {Colors.YELLOW}Exclude {pattern}{Colors.END}", end="")
329+
print()
158330

159331

160332
def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None:
161333
"""
162-
Print a formatted error message including the URL, file size, pattern details, and the exception encountered,
163-
for debugging or logging purposes.
334+
Print a formatted error message including details of the exception.
164335
165336
Parameters
166337
----------
@@ -182,8 +353,7 @@ def _print_error(url: str, e: Exception, max_file_size: int, pattern_type: str,
182353

183354
def _print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None:
184355
"""
185-
Print a formatted success message, including the URL, file size, pattern details, and a summary with estimated
186-
tokens, for debugging or logging purposes.
356+
Print a formatted success message, including estimated tokens.
187357
188358
Parameters
189359
----------

0 commit comments

Comments
 (0)