77
88def remover (html_content : str ) -> str :
99 """
10- This function processes HTML content, removes unnecessary tags,
11- minifies the HTML, and retrieves the title and body content.
10+ This function processes HTML content, removes unnecessary tags
11+ (including style tags), minifies the HTML, and retrieves the
12+ title and body content.
1213
1314 Parameters:
1415 html_content (str): The HTML content to parse
@@ -23,14 +24,16 @@ def remover(html_content: str) -> str:
2324 title_tag = soup .find ('title' )
2425 title = title_tag .get_text () if title_tag else ""
2526
26- # Script Tag Removal
27- [script .extract () for script in soup .find_all ('script' )]
27+ # Script and Style Tag Removal
28+ for tag in soup .find_all (['script' , 'style' ]):
29+ tag .extract ()
2830
2931 # Body Extraction (if it exists)
3032 body_content = soup .find ('body' )
3133 if body_content :
3234 # Minify the HTML within the body tag
3335 minimized_body = minify (str (body_content ))
34- return "Title: " + title + ", Body: " + minimized_body
36+ return "Title: " + title + ", Body: " + minimized_body
3537 else :
36- return "Title: " + title + ", Body: No body content found"
38+ return "Title: " + title + ", Body: No body content found"
39+
0 commit comments