We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 9a67a26 + ea3b545 commit dd29c16Copy full SHA for dd29c16
scrapegraphai/utils/cleanup_html.py
@@ -5,6 +5,7 @@
5
from minify_html import minify
6
from urllib.parse import urljoin
7
8
+
9
def cleanup_html(html_content: str, base_url: str) -> str:
10
"""
11
Processes HTML content by removing unnecessary tags, minifying the HTML, and extracting the title and body content.
@@ -47,5 +48,4 @@ def cleanup_html(html_content: str, base_url: str) -> str:
47
48
minimized_body = minify(str(body_content))
49
return "Title: " + title + ", Body: " + minimized_body + ", Links: " + str(link_urls)
50
-
51
- return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls)
+ return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls)
0 commit comments