Skip to content

Commit fc1920d

Browse files
ZAK1504raphael-intugle
authored andcommitted
Refactor _fetch_paths_recursively in DocsSearchService for readability
1 parent 707d742 commit fc1920d

File tree

1 file changed

+77
-23
lines changed

1 file changed

+77
-23
lines changed

src/intugle/mcp/docs_search/service.py

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -69,40 +69,94 @@ async def list_doc_paths(self) -> List[str]:
6969
self._doc_paths = await self._fetch_paths_recursively(session, self.API_URL)
7070
return self._doc_paths
7171

72+
async def _get_github_api_items(self, session: aiohttp.ClientSession, url: str) -> List[dict]:
73+
"""
74+
Fetches items from the GitHub API.
75+
76+
Args:
77+
session (aiohttp.ClientSession): The HTTP session to use for the request.
78+
url (str): The GitHub API URL to fetch items from.
79+
80+
Returns:
81+
List[dict]: A list of items from the GitHub API response.
82+
83+
Raises:
84+
RuntimeError: If the API request fails or returns a non-200 status code.
85+
"""
86+
async with session.get(url) as response:
87+
if response.status != 200:
88+
raise RuntimeError(f"Could not fetch {url}, status code: {response.status}")
89+
return await response.json()
90+
91+
def _is_valid_markdown_file(self, item: dict) -> bool:
92+
"""
93+
Checks if an item is a valid markdown file that should be included.
94+
95+
Args:
96+
item (dict): A GitHub API item dictionary.
97+
98+
Returns:
99+
bool: True if the item is a valid markdown file not in blacklist, False otherwise.
100+
"""
101+
if item['type'] != 'file':
102+
return False
103+
104+
if not (item['name'].endswith('.md') or item['name'].endswith('.mdx')):
105+
return False
106+
107+
relative_path = item['path'].replace('docsite/docs/', '', 1)
108+
return relative_path not in self.BLACKLISTED_ROUTES
109+
110+
def _extract_relative_path(self, item: dict) -> str:
111+
"""
112+
Extracts the relative path from a GitHub API item.
113+
114+
Args:
115+
item (dict): A GitHub API item dictionary.
116+
117+
Returns:
118+
str: The relative path with 'docsite/docs/' prefix removed.
119+
"""
120+
return item['path'].replace('docsite/docs/', '', 1)
121+
122+
async def _process_github_item(self, session: aiohttp.ClientSession, item: dict) -> List[str]:
123+
"""
124+
Processes a single GitHub API item (file or directory).
125+
126+
Args:
127+
session (aiohttp.ClientSession): The HTTP session to use for recursive requests.
128+
item (dict): A GitHub API item dictionary.
129+
130+
Returns:
131+
List[str]: A list of file paths. Returns a single-item list for files,
132+
or recursively fetched paths for directories.
133+
"""
134+
if self._is_valid_markdown_file(item):
135+
return [self._extract_relative_path(item)]
136+
elif item['type'] == 'dir':
137+
return await self._fetch_paths_recursively(session, item['url'])
138+
return []
139+
72140
async def _fetch_paths_recursively(self, session: aiohttp.ClientSession, url: str) -> List[str]:
73141
"""
74-
Recursively fetches file paths from the GitHub API content endpoint.
142+
Recursively fetches file paths from the GitHub API.
75143
76144
Args:
77-
session (aiohttp.ClientSession): The active asynchronous HTTP session.
78-
url (str): The GitHub API URL for the directory content.
145+
session (aiohttp.ClientSession): The HTTP session to use for the request.
146+
url (str): The GitHub API URL to fetch paths from.
79147
80148
Returns:
81-
List[str]: A list of relative paths found under the given URL, or a list
82-
containing an error string if the fetch fails.
149+
List[str]: A list of relative documentation file paths, or error messages if the request fails.
83150
"""
84-
paths: List[str] = []
85151
try:
86-
async with session.get(url) as response:
87-
if response.status != 200:
88-
return [f"Error: Could not fetch {url}, status code: {response.status}"]
89-
90-
items: List[Dict[str, Any]] = await response.json()
91-
92-
for item in items:
93-
if item['type'] == 'file' and (item['name'].endswith('.md') or item['name'].endswith('.mdx')):
94-
# Strip the docsite/docs/ prefix to get the relative path
95-
relative_path: str = item['path'].replace('docsite/docs/', '', 1)
96-
if relative_path not in self.BLACKLISTED_ROUTES:
97-
paths.append(relative_path)
98-
elif item['type'] == 'dir':
99-
# Recursively fetch paths in subdirectories
100-
paths.extend(await self._fetch_paths_recursively(session, item['url']))
152+
items = await self._get_github_api_items(session, url)
153+
paths = []
154+
for item in items:
155+
paths.extend(await self._process_github_item(session, item))
156+
return paths
101157
except Exception as e:
102158
return [f"Error: Exception while fetching {url}: {e}"]
103159

104-
return paths
105-
106160
async def search_docs(self, paths: List[str]) -> str:
107161
"""
108162
Fetches and concatenates content from a list of documentation paths.

0 commit comments

Comments
 (0)