@@ -69,40 +69,94 @@ async def list_doc_paths(self) -> List[str]:
6969 self ._doc_paths = await self ._fetch_paths_recursively (session , self .API_URL )
7070 return self ._doc_paths
7171
72+ async def _get_github_api_items (self , session : aiohttp .ClientSession , url : str ) -> List [dict ]:
73+ """
74+ Fetches items from the GitHub API.
75+
76+ Args:
77+ session (aiohttp.ClientSession): The HTTP session to use for the request.
78+ url (str): The GitHub API URL to fetch items from.
79+
80+ Returns:
81+ List[dict]: A list of items from the GitHub API response.
82+
83+ Raises:
84+ RuntimeError: If the API request fails or returns a non-200 status code.
85+ """
86+ async with session .get (url ) as response :
87+ if response .status != 200 :
88+ raise RuntimeError (f"Could not fetch { url } , status code: { response .status } " )
89+ return await response .json ()
90+
91+ def _is_valid_markdown_file (self , item : dict ) -> bool :
92+ """
93+ Checks if an item is a valid markdown file that should be included.
94+
95+ Args:
96+ item (dict): A GitHub API item dictionary.
97+
98+ Returns:
99+ bool: True if the item is a valid markdown file not in blacklist, False otherwise.
100+ """
101+ if item ['type' ] != 'file' :
102+ return False
103+
104+ if not (item ['name' ].endswith ('.md' ) or item ['name' ].endswith ('.mdx' )):
105+ return False
106+
107+ relative_path = item ['path' ].replace ('docsite/docs/' , '' , 1 )
108+ return relative_path not in self .BLACKLISTED_ROUTES
109+
110+ def _extract_relative_path (self , item : dict ) -> str :
111+ """
112+ Extracts the relative path from a GitHub API item.
113+
114+ Args:
115+ item (dict): A GitHub API item dictionary.
116+
117+ Returns:
118+ str: The relative path with 'docsite/docs/' prefix removed.
119+ """
120+ return item ['path' ].replace ('docsite/docs/' , '' , 1 )
121+
122+ async def _process_github_item (self , session : aiohttp .ClientSession , item : dict ) -> List [str ]:
123+ """
124+ Processes a single GitHub API item (file or directory).
125+
126+ Args:
127+ session (aiohttp.ClientSession): The HTTP session to use for recursive requests.
128+ item (dict): A GitHub API item dictionary.
129+
130+ Returns:
131+ List[str]: A list of file paths. Returns a single-item list for files,
132+ or recursively fetched paths for directories.
133+ """
134+ if self ._is_valid_markdown_file (item ):
135+ return [self ._extract_relative_path (item )]
136+ elif item ['type' ] == 'dir' :
137+ return await self ._fetch_paths_recursively (session , item ['url' ])
138+ return []
139+
72140 async def _fetch_paths_recursively (self , session : aiohttp .ClientSession , url : str ) -> List [str ]:
73141 """
74- Recursively fetches file paths from the GitHub API content endpoint .
142+ Recursively fetches file paths from the GitHub API.
75143
76144 Args:
77- session (aiohttp.ClientSession): The active asynchronous HTTP session.
78- url (str): The GitHub API URL for the directory content .
145+ session (aiohttp.ClientSession): The HTTP session to use for the request .
146+ url (str): The GitHub API URL to fetch paths from .
79147
80148 Returns:
81- List[str]: A list of relative paths found under the given URL, or a list
82- containing an error string if the fetch fails.
149+ List[str]: A list of relative documentation file paths, or error messages if the request fails.
83150 """
84- paths : List [str ] = []
85151 try :
86- async with session .get (url ) as response :
87- if response .status != 200 :
88- return [f"Error: Could not fetch { url } , status code: { response .status } " ]
89-
90- items : List [Dict [str , Any ]] = await response .json ()
91-
92- for item in items :
93- if item ['type' ] == 'file' and (item ['name' ].endswith ('.md' ) or item ['name' ].endswith ('.mdx' )):
94- # Strip the docsite/docs/ prefix to get the relative path
95- relative_path : str = item ['path' ].replace ('docsite/docs/' , '' , 1 )
96- if relative_path not in self .BLACKLISTED_ROUTES :
97- paths .append (relative_path )
98- elif item ['type' ] == 'dir' :
99- # Recursively fetch paths in subdirectories
100- paths .extend (await self ._fetch_paths_recursively (session , item ['url' ]))
152+ items = await self ._get_github_api_items (session , url )
153+ paths = []
154+ for item in items :
155+ paths .extend (await self ._process_github_item (session , item ))
156+ return paths
101157 except Exception as e :
102158 return [f"Error: Exception while fetching { url } : { e } " ]
103159
104- return paths
105-
106160 async def search_docs (self , paths : List [str ]) -> str :
107161 """
108162 Fetches and concatenates content from a list of documentation paths.
0 commit comments