|
1 | | -""" |
2 | | -Module for removing the unused html tags |
| 1 | +""" |
| 2 | +Module for minimizing the code |
3 | 3 | """ |
4 | 4 | from bs4 import BeautifulSoup |
| 5 | +from minify_html import minify |
5 | 6 |
|
6 | 7 |
|
7 | 8 | def remover(html_content: str) -> str: |
8 | 9 | """ |
9 | | - This function processes the HTML content, removes unnecessary tags, |
10 | | - and retrieves the title and body content. |
| 10 | + This function processes HTML content, removes unnecessary tags, |
| 11 | + minifies the HTML, and retrieves the title and body content. |
11 | 12 |
|
12 | 13 | Parameters: |
13 | | - html_content (str): the HTML content to parse |
| 14 | + html_content (str): The HTML content to parse |
14 | 15 |
|
15 | 16 | Returns: |
16 | | - str: the parsed title followed by the body content without script tags |
| 17 | + str: The parsed title followed by the minified body content |
17 | 18 | """ |
18 | 19 |
|
19 | 20 | soup = BeautifulSoup(html_content, 'html.parser') |
20 | 21 |
|
| 22 | + # Title Extraction |
21 | 23 | title_tag = soup.find('title') |
22 | 24 | title = title_tag.get_text() if title_tag else "" |
23 | 25 |
|
| 26 | + # Script Tag Removal |
24 | 27 | [script.extract() for script in soup.find_all('script')] |
25 | 28 |
|
| 29 | + # Body Extraction (if it exists) |
26 | 30 | body_content = soup.find('body') |
27 | | - body = str(body_content) if body_content else "" |
28 | | - |
29 | | - return "Title: " + title + ", Body: " + body |
| 31 | + if body_content: |
| 32 | + # Minify the HTML within the body tag |
| 33 | + minimized_body = minify(str(body_content)) |
| 34 | + return "Title: " + title + ", Body: " + minimized_body |
| 35 | + else: |
| 36 | + return "Title: " + title + ", Body: No body content found" |
0 commit comments