@@ -53,7 +53,8 @@ def _metadata_extractor(
5353class RecursiveUrlLoader (BaseLoader ):
5454 """Recursively load all child links from a root URL.
5555
56- **Security Note**: This loader is a crawler that will start crawling
56+ **Security Note**:
57+ This loader is a crawler that will start crawling
5758 at a given URL and then expand to crawl child links recursively.
5859
5960 Web crawlers should generally NOT be deployed with network access
@@ -154,36 +155,36 @@ class RecursiveUrlLoader(BaseLoader):
154155 content. To parse this HTML into a more human/LLM-friendly format you can pass
155156 in a custom ``extractor`` method:
156157
157- .. code-block:: python
158+ .. code-block:: python
158159
159- # This example uses `beautifulsoup4` and `lxml`
160- import re
161- from bs4 import BeautifulSoup
160+ # This example uses `beautifulsoup4` and `lxml`
161+ import re
162+ from bs4 import BeautifulSoup
162163
163- def bs4_extractor(html: str) -> str:
164- soup = BeautifulSoup(html, "lxml")
165- return re.sub(r"\n \ n +", "\n \n ", soup.text).strip()
164+ def bs4_extractor(html: str) -> str:
165+ soup = BeautifulSoup(html, "lxml")
166+ return re.sub(r"\\ n \\ n+", "\\ n \ \ n", soup.text).strip()
166167
167- loader = RecursiveUrlLoader(
168- "https://docs.python.org/3.9/",
169- extractor=bs4_extractor,
170- )
171- print(loader.load()[0].page_content[:200])
168+ loader = RecursiveUrlLoader(
169+ "https://docs.python.org/3.9/",
170+ extractor=bs4_extractor,
171+ )
172+ print(loader.load()[0].page_content[:200])
172173
173174
174- .. code-block:: python
175+ .. code-block:: python
175176
176- 3.9.19 Documentation
177+ 3.9.19 Documentation
177178
178- Download
179- Download these documents
180- Docs by version
179+ Download
180+ Download these documents
181+ Docs by version
181182
182- Python 3.13 (in development)
183- Python 3.12 (stable)
184- Python 3.11 (security-fixes)
185- Python 3.10 (security-fixes)
186- Python 3.9 (securit
183+ Python 3.13 (in development)
184+ Python 3.12 (stable)
185+ Python 3.11 (security-fixes)
186+ Python 3.10 (security-fixes)
187+ Python 3.9 (securit
187188
188189 Metadata extraction:
189190 Similarly to content extraction, you can specify a metadata extraction function
0 commit comments