|
19 | 19 | from utils import holiday_replacements |
20 | 20 | import holidays |
21 | 21 | import pytz |
| 22 | +from bs4 import BeautifulSoup |
22 | 23 |
|
23 | 24 | from telegram import Update |
24 | 25 | from telegram.ext import CallbackContext |
@@ -607,13 +608,14 @@ async def handle_message(bot, update: Update, context: CallbackContext, logger) |
607 | 608 | # Ensure the bot has a substantive response to send |
608 | 609 | if bot_reply: |
609 | 610 | # Function to clean unsupported tags |
610 | | - def sanitize_html(content): |
611 | | - # Remove unsupported HTML tags |
612 | | - for tag in ['<pre>', '</pre>', '<br>', '<br/>', '</br>', '<div>', '</div>', '<span>', '</span>', '<p>', '</p>']: |
613 | | - content = content.replace(tag, '') |
614 | | - # Optionally: Replace line breaks with "\n" to preserve formatting |
615 | | - content = content.replace('<br>', '\n').replace('<br/>', '\n') |
616 | | - return content |
| 611 | + # # // old method |
| 612 | + # def sanitize_html(content): |
| 613 | + # # Remove unsupported HTML tags |
| 614 | + # for tag in ['<pre>', '</pre>', '<br>', '<br/>', '</br>', '<div>', '</div>', '<span>', '</span>', '<p>', '</p>']: |
| 615 | + # content = content.replace(tag, '') |
| 616 | + # # Optionally: Replace line breaks with "\n" to preserve formatting |
| 617 | + # content = content.replace('<br>', '\n').replace('<br/>', '\n') |
| 618 | + # return content |
617 | 619 |
|
618 | 620 | # Convert markdown to HTML |
619 | 621 | escaped_reply = markdown_to_html(bot_reply) |
@@ -679,13 +681,13 @@ def sanitize_html(content): |
679 | 681 | # Ensure the bot has a substantive response to send |
680 | 682 | if bot_reply: |
681 | 683 | # Function to clean unsupported tags |
682 | | - def sanitize_html(content): |
683 | | - # Remove unsupported HTML tags |
684 | | - for tag in ['<pre>', '</pre>', '<br>', '<br/>', '</br>', '<div>', '</div>', '<span>', '</span>', '<p>', '</p>']: |
685 | | - content = content.replace(tag, '') |
686 | | - # Optionally: Replace line breaks with "\n" to preserve formatting |
687 | | - content = content.replace('<br>', '\n').replace('<br/>', '\n') |
688 | | - return content |
| 684 | + # def sanitize_html(content): |
| 685 | + # # Remove unsupported HTML tags |
| 686 | + # for tag in ['<pre>', '</pre>', '<br>', '<br/>', '</br>', '<div>', '</div>', '<span>', '</span>', '<p>', '</p>']: |
| 687 | + # content = content.replace(tag, '') |
| 688 | + # # Optionally: Replace line breaks with "\n" to preserve formatting |
| 689 | + # content = content.replace('<br>', '\n').replace('<br/>', '\n') |
| 690 | + # return content |
689 | 691 |
|
690 | 692 | # Convert markdown to HTML |
691 | 693 | escaped_reply = markdown_to_html(bot_reply) |
@@ -1019,6 +1021,8 @@ def sanitize_html(content): |
1019 | 1021 | # parse_mode=ParseMode.HTML |
1020 | 1022 | # ) |
1021 | 1023 |
|
| 1024 | + escaped_reply = sanitize_html(escaped_reply) |
| 1025 | + |
1022 | 1026 | message_parts = split_message(escaped_reply) |
1023 | 1027 |
|
1024 | 1028 | for part in message_parts: |
@@ -1280,6 +1284,19 @@ def split_message(message, max_length=4000): |
1280 | 1284 |
|
1281 | 1285 | return message_parts |
1282 | 1286 |
|
| 1287 | +# sanitize html |
| 1288 | +def sanitize_html(content): |
| 1289 | + soup = BeautifulSoup(content, 'html.parser') |
| 1290 | + |
| 1291 | + # Remove unsupported tags |
| 1292 | + for tag in soup.find_all(): |
| 1293 | + if tag.name not in ['b', 'i', 'u', 's', 'a', 'code', 'pre']: |
| 1294 | + tag.unwrap() |
| 1295 | + |
| 1296 | + # Fix improperly nested tags |
| 1297 | + content = str(soup) |
| 1298 | + return content |
| 1299 | + |
1283 | 1300 | # # // (old request type) |
1284 | 1301 | # async def make_api_request(bot, chat_history, timeout=30): |
1285 | 1302 | # # Prepare the payload for the API request with updated chat history |
|
0 commit comments