From 2264905f0cc2c05363cc43d96df6a7a1e4d96f09 Mon Sep 17 00:00:00 2001 From: mahtoid Date: Sun, 22 Mar 2026 11:50:08 +0000 Subject: [PATCH 1/8] AST-based markdown parsing. Bug Report template update. CSS update around embeds and buttons. Test and Lint workflows. Minor improvements --- .github/ISSUE_TEMPLATE/bug-report.yml | 8 + .github/workflows/codeql-analysis.yml | 66 -- .github/workflows/lint.yml | 17 + .github/workflows/test.yml | 35 + .gitignore | 4 +- chat_exporter/__init__.py | 11 +- chat_exporter/chat_exporter.py | 36 +- chat_exporter/construct/assets/__init__.py | 4 +- chat_exporter/construct/assets/attachment.py | 107 ++- chat_exporter/construct/assets/component.py | 326 +++++---- chat_exporter/construct/assets/embed.py | 207 ++++-- chat_exporter/construct/assets/reaction.py | 30 +- chat_exporter/construct/attachment_handler.py | 178 ++--- chat_exporter/construct/message.py | 391 ++++++----- chat_exporter/construct/transcript.py | 171 +++-- chat_exporter/ext/cache.py | 9 +- chat_exporter/ext/discord_import.py | 2 +- chat_exporter/ext/discord_utils.py | 32 +- chat_exporter/ext/emoji_convert.py | 6 +- chat_exporter/ext/html_generator.py | 35 +- chat_exporter/html/base.html | 50 +- chat_exporter/html/embed/field-inline.html | 2 +- chat_exporter/html/embed/field.html | 2 +- chat_exporter/parse/ast.py | 651 ++++++++++++++++++ chat_exporter/parse/markdown.py | 411 ++--------- chat_exporter/parse/mention.py | 233 ------- pyproject.toml | 32 +- tests/test_ast.py | 156 +++++ tests/test_export.py | 449 ++++++++++++ 29 files changed, 2354 insertions(+), 1307 deletions(-) delete mode 100644 .github/workflows/codeql-analysis.yml create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/test.yml create mode 100644 chat_exporter/parse/ast.py delete mode 100644 chat_exporter/parse/mention.py create mode 100644 tests/test_ast.py create mode 100644 tests/test_export.py diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index fa23fbf..82e0ebb 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -37,6 +37,14 @@ body: validations: required: true +- type: textarea + attributes: + label: Reproduction + description: How to reproduce the bug. + placeholder: Either provide a minimal reproducible example or a set of steps to reproduce the bug. + validations: + required: true + - type: textarea attributes: label: Bug Traceback diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index 201c1e5..0000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: "CodeQL" - -on: - push: - branches: [master] - pull_request: - # The branches below must be a subset of the branches above - branches: [master] - schedule: - - cron: '0 8 * * 4' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - # Override automatic language detection by changing the below list - # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python'] - language: ['python'] - # Learn more... - # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - with: - # We must fetch at least the immediate parents so that if this is - # a pull request then we can checkout the head. - fetch-depth: 2 - - # If this run was triggered by a pull request event, then checkout - # the head of the pull request instead of the merge commit. - - run: git checkout HEAD^2 - if: ${{ github.event_name == 'pull_request' }} - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 - - # â„šī¸ Command-line programs to run using the OS shell. - # 📚 https://git.io/JvXDl - - # âœī¸ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - #- run: | - # make bootstrap - # make release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..c283f90 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,17 @@ +name: lint + +on: + push: + branches: [ "**" ] + pull_request: + branches: [ "**" ] + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/ruff-action@v1 + with: + args: "check --select S,E,F,I --exit-zero" + diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..8571bd6 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,35 @@ +name: test + +on: + push: + branches: [ "**" ] + pull_request: + branches: [ "**" ] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Run AST unit tests + run: python -m unittest discover -s tests -p "test_*.py" -v + + - name: Upload export artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: export-artifacts + path: tests/artifacts/ + if-no-files-found: ignore diff --git a/.gitignore b/.gitignore index 4d11da2..8bf9cf5 100644 --- a/.gitignore +++ b/.gitignore @@ -168,8 +168,8 @@ chat_exporter/config.py *.fs* *.sqlite -.idea/* -.idea\* +# Tests +tests/artifacts # macOS .DS_Store \ No newline at end of file diff --git a/chat_exporter/__init__.py b/chat_exporter/__init__.py index f178d44..a85b419 100644 --- a/chat_exporter/__init__.py +++ b/chat_exporter/__init__.py @@ -1,13 +1,14 @@ from chat_exporter.chat_exporter import ( - export, - raw_export, - quick_export, AttachmentHandler, + AttachmentToDiscordChannelHandler, AttachmentToLocalFileHostHandler, AttachmentToWebhookHandler, - AttachmentToDiscordChannelHandler) + export, + quick_export, + raw_export, +) -__version__ = "3.0.1" +__version__ = "3.1.0" __all__ = ( export, diff --git a/chat_exporter/chat_exporter.py b/chat_exporter/chat_exporter.py index e0eb17b..08c5ef5 100644 --- a/chat_exporter/chat_exporter.py +++ b/chat_exporter/chat_exporter.py @@ -2,16 +2,31 @@ import io from typing import List, Optional +from chat_exporter.construct.attachment_handler import ( + AttachmentHandler, + AttachmentToDiscordChannelHandler, + AttachmentToLocalFileHostHandler, + AttachmentToWebhookHandler, +) from chat_exporter.construct.transcript import Transcript from chat_exporter.ext.discord_import import discord -from chat_exporter.construct.attachment_handler import AttachmentHandler, AttachmentToLocalFileHostHandler, AttachmentToDiscordChannelHandler, AttachmentToWebhookHandler + +__all__ = [ + "quick_export", + "export", + "raw_export", + "AttachmentHandler", + "AttachmentToLocalFileHostHandler", + "AttachmentToDiscordChannelHandler", + "AttachmentToWebhookHandler", +] async def quick_export( channel: discord.TextChannel, guild: Optional[discord.Guild] = None, bot: Optional[discord.Client] = None, - raise_exceptions: bool = False + raise_exceptions: bool = False, ): """ Create a quick export of your Discord channel. @@ -39,16 +54,15 @@ async def quick_export( support_dev=True, bot=bot, attachment_handler=None, - raise_exceptions=raise_exceptions - ).export() - ).html + raise_exceptions=raise_exceptions, + ).export() + ).html if not transcript: return transcript_embed = discord.Embed( - description=f"**Transcript Name:** transcript-{channel.name}\n\n", - colour=discord.Colour.blurple() + description=f"**Transcript Name:** transcript-{channel.name}\n\n", colour=discord.Colour.blurple() ) transcript_file = discord.File(io.BytesIO(transcript.encode()), filename=f"transcript-{channel.name}.html") @@ -67,7 +81,7 @@ async def export( after: Optional[datetime.datetime] = None, support_dev: Optional[bool] = True, attachment_handler: Optional[AttachmentHandler] = None, - raise_exceptions: bool = False + raise_exceptions: bool = False, ): """ Create a customised transcript of your Discord channel. @@ -101,7 +115,7 @@ async def export( support_dev=support_dev, bot=bot, attachment_handler=attachment_handler, - raise_exceptions=raise_exceptions + raise_exceptions=raise_exceptions, ).export() ).html @@ -116,7 +130,7 @@ async def raw_export( fancy_times: Optional[bool] = True, support_dev: Optional[bool] = True, attachment_handler: Optional[AttachmentHandler] = None, - raise_exceptions: bool = False + raise_exceptions: bool = False, ): """ Create a customised transcript with your own captured Discord messages @@ -148,6 +162,6 @@ async def raw_export( support_dev=support_dev, bot=bot, attachment_handler=attachment_handler, - raise_exceptions=raise_exceptions + raise_exceptions=raise_exceptions, ).export() ).html diff --git a/chat_exporter/construct/assets/__init__.py b/chat_exporter/construct/assets/__init__.py index 3a930ee..d107085 100644 --- a/chat_exporter/construct/assets/__init__.py +++ b/chat_exporter/construct/assets/__init__.py @@ -1,7 +1,7 @@ -from .embed import Embed -from .reaction import Reaction from .attachment import Attachment from .component import Component +from .embed import Embed +from .reaction import Reaction __all__ = ( Embed, diff --git a/chat_exporter/construct/assets/attachment.py b/chat_exporter/construct/assets/attachment.py index 80ffea9..0ac23b3 100644 --- a/chat_exporter/construct/assets/attachment.py +++ b/chat_exporter/construct/assets/attachment.py @@ -2,12 +2,12 @@ from chat_exporter.ext.discord_utils import DiscordUtils from chat_exporter.ext.html_generator import ( + PARSE_MODE_NONE, + audio_attachment, fill_out, img_attachment, msg_attachment, - audio_attachment, video_attachment, - PARSE_MODE_NONE, ) @@ -45,39 +45,51 @@ async def build_attachment(self): self._mark_spoiler() async def image(self): - self.attachments = await fill_out(self.guild, img_attachment, [ - ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), - ("ATTACH_URL_THUMB", self.attachments.proxy_url, PARSE_MODE_NONE) - ]) + self.attachments = await fill_out( + self.guild, + img_attachment, + [ + ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_URL_THUMB", self.attachments.proxy_url, PARSE_MODE_NONE), + ], + ) async def video(self): - self.attachments = await fill_out(self.guild, video_attachment, [ - ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE) - ]) + self.attachments = await fill_out( + self.guild, video_attachment, [("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE)] + ) async def audio(self): file_icon = DiscordUtils.file_attachment_audio file_size = self.get_file_size(self.attachments.size) - self.attachments = await fill_out(self.guild, audio_attachment, [ - ("ATTACH_ICON", file_icon, PARSE_MODE_NONE), - ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), - ("ATTACH_BYTES", str(file_size), PARSE_MODE_NONE), - ("ATTACH_AUDIO", self.attachments.proxy_url, PARSE_MODE_NONE), - ("ATTACH_FILE", str(self.attachments.filename), PARSE_MODE_NONE) - ]) + self.attachments = await fill_out( + self.guild, + audio_attachment, + [ + ("ATTACH_ICON", file_icon, PARSE_MODE_NONE), + ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_BYTES", str(file_size), PARSE_MODE_NONE), + ("ATTACH_AUDIO", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_FILE", str(self.attachments.filename), PARSE_MODE_NONE), + ], + ) async def file(self): file_icon = await self.get_file_icon() file_size = self.get_file_size(self.attachments.size) - self.attachments = await fill_out(self.guild, msg_attachment, [ - ("ATTACH_ICON", file_icon, PARSE_MODE_NONE), - ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), - ("ATTACH_BYTES", str(file_size), PARSE_MODE_NONE), - ("ATTACH_FILE", str(self.attachments.filename), PARSE_MODE_NONE) - ]) + self.attachments = await fill_out( + self.guild, + msg_attachment, + [ + ("ATTACH_ICON", file_icon, PARSE_MODE_NONE), + ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_BYTES", str(file_size), PARSE_MODE_NONE), + ("ATTACH_FILE", str(self.attachments.filename), PARSE_MODE_NONE), + ], + ) @staticmethod def get_file_size(file_size): @@ -93,7 +105,7 @@ async def get_file_icon(self) -> str: return self.resolve_file_icon( name=str(getattr(self.attachments, "filename", "") or ""), content_type=str(getattr(self.attachments, "content_type", "") or ""), - url=str(getattr(self.attachments, "proxy_url", "") or "") + url=str(getattr(self.attachments, "proxy_url", "") or ""), ) @staticmethod @@ -102,12 +114,45 @@ def resolve_file_icon(name: str = "", content_type: str = "", url: str = "") -> webcode_types = "html", "htm", "css", "rss", "xhtml", "xml" code_types = "py", "cgi", "pl", "gadget", "jar", "msi", "wsf", "bat", "php", "js" document_types = ( - "txt", "doc", "docx", "rtf", "xls", "xlsx", "ppt", "pptx", "odt", "odp", "ods", "odg", "odf", "swx", - "sxi", "sxc", "sxd", "stw" + "txt", + "doc", + "docx", + "rtf", + "xls", + "xlsx", + "ppt", + "pptx", + "odt", + "odp", + "ods", + "odg", + "odf", + "swx", + "sxi", + "sxc", + "sxd", + "stw", ) archive_types = ( - "br", "rpm", "dcm", "epub", "zip", "tar", "rar", "gz", "bz2", "7x", "7z", "deb", "ar", "z", "lzo", "lz", - "lz4", "arj", "pkg" + "br", + "rpm", + "dcm", + "epub", + "zip", + "tar", + "rar", + "gz", + "bz2", + "7x", + "7z", + "deb", + "ar", + "z", + "lzo", + "lz", + "lz4", + "arj", + "pkg", ) content_type = (content_type or "").lower() @@ -154,7 +199,8 @@ def _is_spoiler(self) -> bool: if callable(spoiler_attr): try: return bool(spoiler_attr()) - except Exception: + except Exception: # noqa: S110 + # In case spoiler_attr() is a property mascotading as a callable that fails pass if spoiler_attr is not None: return bool(spoiler_attr) @@ -163,7 +209,8 @@ def _is_spoiler(self) -> bool: if callable(is_spoiler_method): try: return bool(is_spoiler_method()) - except Exception: + except Exception: # noqa: S110 + # If the method check fails, assume it's not a spoiler return False return False @@ -174,7 +221,7 @@ def _mark_spoiler(self): return replacements = ( - ('
', '
'), + ("
", '
'), ('class="chatlog__attachment"', 'class="chatlog__attachment chatlog__attachment-spoiler"'), ) diff --git a/chat_exporter/construct/assets/component.py b/chat_exporter/construct/assets/component.py index 137d46f..c425281 100644 --- a/chat_exporter/construct/assets/component.py +++ b/chat_exporter/construct/assets/component.py @@ -4,9 +4,9 @@ from chat_exporter.ext.discord_import import discord from chat_exporter.ext.discord_utils import DiscordUtils from chat_exporter.ext.html_generator import ( + PARSE_MODE_EMBED, PARSE_MODE_EMOJI, PARSE_MODE_MARKDOWN, - PARSE_MODE_EMBED, PARSE_MODE_NONE, component_button, component_container, @@ -136,8 +136,8 @@ def _get_file_icon(self, file_name: str, content_type: str = "", media_url: str async def build_component(self, c): # Check for component type attribute - component_type = getattr(c, 'type', None) - + component_type = getattr(c, "type", None) + # Handle legacy components (v1) if isinstance(c, discord.Button): return await self.build_button(c) @@ -145,13 +145,13 @@ async def build_component(self, c): menu_html = await self.build_menu(c) Component.menu_div_id += 1 return menu_html - + # Handle components v2 based on type if component_type is None: return "" - - type_value = component_type.value if hasattr(component_type, 'value') else component_type - + + type_value = component_type.value if hasattr(component_type, "value") else component_type + # ActionRow (type 1) - contains buttons/selects if type_value == 1: return await self.build_action_row(c) @@ -184,23 +184,23 @@ async def build_component(self, c): # Container (type 17) elif type_value == 17: return await self.build_container(c) - + return "" async def build_action_row(self, c): """Build an action row containing buttons or select menus""" result = "" items_html = "" - - children = getattr(c, 'children', []) or getattr(c, 'components', []) + + children = getattr(c, "children", []) or getattr(c, "components", []) for child in children: child_html = await self.build_component(child) if child_html: items_html += child_html - + if items_html: result = f'
{items_html}
' - + return result async def build_button(self, c): @@ -231,20 +231,26 @@ async def build_button(self, c): raw_style_str = str(raw_style) style_key = raw_style_str.split(".")[-1].lower() style = self.styles.get(style_key, "#4F545C") - button_variant = "chatlog__component-button--link" if style_key == "link" else "chatlog__component-button--filled" + button_variant = ( + "chatlog__component-button--link" if style_key == "link" else "chatlog__component-button--filled" + ) emoji = self._stringify_emoji(self._get_attr(c, "emoji", None)) - return await fill_out(self.guild, component_button, [ - ("DISABLED", "chatlog__component-disabled" if disabled else "", PARSE_MODE_NONE), - ("URL", url, PARSE_MODE_NONE), - ("BUTTON_VARIANT", button_variant, PARSE_MODE_NONE), - ("ARIA_DISABLED", "true" if disabled else "false", PARSE_MODE_NONE), - ("LABEL", label, PARSE_MODE_MARKDOWN), - ("EMOJI", emoji, PARSE_MODE_EMOJI), - ("ICON", icon, PARSE_MODE_NONE), - ("TARGET", target, PARSE_MODE_NONE), - ("STYLE", style, PARSE_MODE_NONE) - ]) + return await fill_out( + self.guild, + component_button, + [ + ("DISABLED", "chatlog__component-disabled" if disabled else "", PARSE_MODE_NONE), + ("URL", url, PARSE_MODE_NONE), + ("BUTTON_VARIANT", button_variant, PARSE_MODE_NONE), + ("ARIA_DISABLED", "true" if disabled else "false", PARSE_MODE_NONE), + ("LABEL", label, PARSE_MODE_MARKDOWN), + ("EMOJI", emoji, PARSE_MODE_EMOJI), + ("ICON", icon, PARSE_MODE_NONE), + ("TARGET", target, PARSE_MODE_NONE), + ("STYLE", style, PARSE_MODE_NONE), + ], + ) async def build_menu(self, c): placeholder = self._get_attr(c, "placeholder", "") or "" @@ -268,14 +274,18 @@ async def build_menu(self, c): if not disabled: content = await self.build_menu_options(options) - menu_html = await fill_out(self.guild, component_menu, [ - ("DISABLED", "chatlog__component-disabled" if disabled else "", PARSE_MODE_NONE), - ("ID", str(self.menu_div_id), PARSE_MODE_NONE), - ("PLACEHOLDER", str(selected_label), PARSE_MODE_MARKDOWN), - ("PLACEHOLDER_TITLE", str(placeholder), PARSE_MODE_MARKDOWN), - ("CONTENT", str(content), PARSE_MODE_NONE), - ("ICON", DiscordUtils.interaction_dropdown_icon, PARSE_MODE_NONE), - ]) + menu_html = await fill_out( + self.guild, + component_menu, + [ + ("DISABLED", "chatlog__component-disabled" if disabled else "", PARSE_MODE_NONE), + ("ID", str(self.menu_div_id), PARSE_MODE_NONE), + ("PLACEHOLDER", str(selected_label), PARSE_MODE_MARKDOWN), + ("PLACEHOLDER_TITLE", str(placeholder), PARSE_MODE_MARKDOWN), + ("CONTENT", str(content), PARSE_MODE_NONE), + ("ICON", DiscordUtils.interaction_dropdown_icon, PARSE_MODE_NONE), + ], + ) return menu_html async def build_menu_options(self, options): @@ -289,20 +299,32 @@ async def build_menu_options(self, options): check_mark = "✓" if is_default else "" if option_emoji: - content.append(await fill_out(self.guild, component_menu_options_emoji, [ - ("EMOJI", str(option_emoji), PARSE_MODE_EMOJI), - ("TITLE", str(label), PARSE_MODE_MARKDOWN), - ("DESCRIPTION", str(description) if description else "", PARSE_MODE_MARKDOWN), - ("DEFAULT_CLASS", default_class, PARSE_MODE_NONE), - ("CHECK", check_mark, PARSE_MODE_NONE), - ])) + content.append( + await fill_out( + self.guild, + component_menu_options_emoji, + [ + ("EMOJI", str(option_emoji), PARSE_MODE_EMOJI), + ("TITLE", str(label), PARSE_MODE_MARKDOWN), + ("DESCRIPTION", str(description) if description else "", PARSE_MODE_MARKDOWN), + ("DEFAULT_CLASS", default_class, PARSE_MODE_NONE), + ("CHECK", check_mark, PARSE_MODE_NONE), + ], + ) + ) else: - content.append(await fill_out(self.guild, component_menu_options, [ - ("TITLE", str(label), PARSE_MODE_MARKDOWN), - ("DESCRIPTION", str(description) if description else "", PARSE_MODE_MARKDOWN), - ("DEFAULT_CLASS", default_class, PARSE_MODE_NONE), - ("CHECK", check_mark, PARSE_MODE_NONE), - ])) + content.append( + await fill_out( + self.guild, + component_menu_options, + [ + ("TITLE", str(label), PARSE_MODE_MARKDOWN), + ("DESCRIPTION", str(description) if description else "", PARSE_MODE_MARKDOWN), + ("DEFAULT_CLASS", default_class, PARSE_MODE_NONE), + ("CHECK", check_mark, PARSE_MODE_NONE), + ], + ) + ) if content: content = f'' @@ -311,17 +333,17 @@ async def build_menu_options(self, options): async def build_container(self, c): """Build a container component (like an embed)""" - accent_color = getattr(c, 'accent_color', None) or getattr(c, 'accent_colour', None) - spoiler = getattr(c, 'spoiler', False) - components = getattr(c, 'components', []) or getattr(c, 'children', []) - + accent_color = getattr(c, "accent_color", None) or getattr(c, "accent_colour", None) + spoiler = getattr(c, "spoiler", False) + components = getattr(c, "components", []) or getattr(c, "children", []) + # Build nested components content_html = "" for child in components: child_html = await self.build_component(child) if child_html: content_html += child_html - + # Handle accent color accent_style = "" accent_class = "" @@ -340,60 +362,76 @@ async def build_container(self, c): else: color_value = int(accent_color) color_hex = f"#{color_value:06x}" - accent_style = f'--component-accent:{color_hex};' + accent_style = f"--component-accent:{color_hex};" accent_class = "chatlog__component-container--accent" except (TypeError, ValueError): accent_style = "" - + spoiler_class = "chatlog__component-spoiler" if spoiler else "" spoiler_label = '
SPOILER
' if spoiler else "" - - return await fill_out(self.guild, component_container, [ - ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), - ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), - ("ACCENT_CLASS", accent_class, PARSE_MODE_NONE), - ("ACCENT_COLOR_STYLE", accent_style, PARSE_MODE_NONE), - ("CONTENT", content_html, PARSE_MODE_NONE), - ]) + + return await fill_out( + self.guild, + component_container, + [ + ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), + ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), + ("ACCENT_CLASS", accent_class, PARSE_MODE_NONE), + ("ACCENT_COLOR_STYLE", accent_style, PARSE_MODE_NONE), + ("CONTENT", content_html, PARSE_MODE_NONE), + ], + ) async def build_section(self, c): """Build a section component with content and accessory""" - components = getattr(c, 'components', []) or getattr(c, 'children', []) - accessory = getattr(c, 'accessory', None) + components = getattr(c, "components", []) or getattr(c, "children", []) + accessory = getattr(c, "accessory", None) has_accessory = accessory is not None - + # Build content (text displays) content_html = "" for child in components: child_html = await self.build_component(child) if child_html: content_html += child_html - + # Build accessory (thumbnail or button) accessory_html = "" if accessory: accessory_html = await self.build_component(accessory) - - return await fill_out(self.guild, component_section, [ - ("CONTENT", content_html, PARSE_MODE_NONE), - ("ACCESSORY", accessory_html, PARSE_MODE_NONE), - ("HAS_ACCESSORY_CLASS", "chatlog__component-section--has-accessory" if has_accessory else "", PARSE_MODE_NONE), - ]) + + return await fill_out( + self.guild, + component_section, + [ + ("CONTENT", content_html, PARSE_MODE_NONE), + ("ACCESSORY", accessory_html, PARSE_MODE_NONE), + ( + "HAS_ACCESSORY_CLASS", + "chatlog__component-section--has-accessory" if has_accessory else "", + PARSE_MODE_NONE, + ), + ], + ) async def build_text_display(self, c): """Build a text display component""" - content = getattr(c, 'content', '') - - return await fill_out(self.guild, component_text_display, [ - ("CONTENT", str(content), PARSE_MODE_EMBED), - ]) + content = getattr(c, "content", "") + + return await fill_out( + self.guild, + component_text_display, + [ + ("CONTENT", str(content), PARSE_MODE_EMBED), + ], + ) async def build_thumbnail(self, c): """Build a thumbnail component""" - media = self._get_attr(c, 'media', None) - description = self._get_attr(c, 'description', None) - spoiler = bool(self._get_attr(c, 'spoiler', False)) - + media = self._get_attr(c, "media", None) + description = self._get_attr(c, "description", None) + spoiler = bool(self._get_attr(c, "spoiler", False)) + url = self._get_media_url(media) if not url: return "" @@ -408,7 +446,7 @@ async def build_thumbnail(self, c): spoiler_label = "" title_text = description_text alt_text = description_text or file_name - + if spoiler: spoiler_label = '
SPOILER
' title_text = "Spoiler" @@ -416,25 +454,29 @@ async def build_thumbnail(self, c): description_overlay = "" elif description: description_overlay = f'
{description}
' - - return await fill_out(self.guild, component_thumbnail, [ - ("URL", str(url), PARSE_MODE_NONE), - ("TITLE", title_text, PARSE_MODE_MARKDOWN), - ("ALT", alt_text, PARSE_MODE_MARKDOWN), - ("DESCRIPTION", description_text, PARSE_MODE_MARKDOWN), - ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), - ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), - ("DESCRIPTION_OVERLAY", description_overlay, PARSE_MODE_NONE), - ]) + + return await fill_out( + self.guild, + component_thumbnail, + [ + ("URL", str(url), PARSE_MODE_NONE), + ("TITLE", title_text, PARSE_MODE_MARKDOWN), + ("ALT", alt_text, PARSE_MODE_MARKDOWN), + ("DESCRIPTION", description_text, PARSE_MODE_MARKDOWN), + ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), + ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), + ("DESCRIPTION_OVERLAY", description_overlay, PARSE_MODE_NONE), + ], + ) async def build_media_gallery(self, c): """Build a media gallery component""" - items = getattr(c, 'items', []) or getattr(c, 'components', []) or getattr(c, 'children', []) - + items = getattr(c, "items", []) or getattr(c, "components", []) or getattr(c, "children", []) + items_html = "" for item in items: items_html += await self.build_media_gallery_item(item) - + # Determine gallery class based on item count item_count = len(items) gallery_class = "" @@ -446,18 +488,22 @@ async def build_media_gallery(self, c): gallery_class = "chatlog__media-gallery-triple" elif item_count >= 4: gallery_class = "chatlog__media-gallery-grid" - - return await fill_out(self.guild, component_media_gallery, [ - ("ITEMS", items_html, PARSE_MODE_NONE), - ("GALLERY_CLASS", gallery_class, PARSE_MODE_NONE), - ]) + + return await fill_out( + self.guild, + component_media_gallery, + [ + ("ITEMS", items_html, PARSE_MODE_NONE), + ("GALLERY_CLASS", gallery_class, PARSE_MODE_NONE), + ], + ) async def build_media_gallery_item(self, item): """Build a single media gallery item""" - media = self._get_attr(item, 'media', None) - description = self._get_attr(item, 'description', None) - spoiler = bool(self._get_attr(item, 'spoiler', False)) - + media = self._get_attr(item, "media", None) + description = self._get_attr(item, "description", None) + spoiler = bool(self._get_attr(item, "spoiler", False)) + url = self._get_media_url(media) if not url: return "" @@ -473,45 +519,53 @@ async def build_media_gallery_item(self, item): spoiler_label = "" title_text = description_text alt_text = description_text or file_name - + if spoiler: spoiler_label = '
SPOILER
' title_text = "Spoiler" alt_text = "Spoiler" elif description: description_overlay = f'
{description}
' - - return await fill_out(self.guild, component_media_gallery_item, [ - ("URL", str(url), PARSE_MODE_NONE), - ("TITLE", title_text, PARSE_MODE_MARKDOWN), - ("ALT", alt_text, PARSE_MODE_MARKDOWN), - ("DESCRIPTION", description_text, PARSE_MODE_MARKDOWN), - ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), - ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), - ("DESCRIPTION_OVERLAY", description_overlay, PARSE_MODE_NONE), - ]) + + return await fill_out( + self.guild, + component_media_gallery_item, + [ + ("URL", str(url), PARSE_MODE_NONE), + ("TITLE", title_text, PARSE_MODE_MARKDOWN), + ("ALT", alt_text, PARSE_MODE_MARKDOWN), + ("DESCRIPTION", description_text, PARSE_MODE_MARKDOWN), + ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), + ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), + ("DESCRIPTION_OVERLAY", description_overlay, PARSE_MODE_NONE), + ], + ) async def build_separator(self, c): """Build a separator component""" - divider = self._get_attr(c, 'divider', True) - spacing = self._get_attr(c, 'spacing', 1) + divider = self._get_attr(c, "divider", True) + spacing = self._get_attr(c, "spacing", 1) if not isinstance(spacing, int) and hasattr(spacing, "value"): spacing = spacing.value - + # Spacing: 1 = SMALL, 2 = LARGE spacing_class = "chatlog__separator-large" if spacing == 2 else "chatlog__separator-small" divider_html = '
' if divider else "" - - return await fill_out(self.guild, component_separator, [ - ("SPACING_CLASS", spacing_class, PARSE_MODE_NONE), - ("DIVIDER", divider_html, PARSE_MODE_NONE), - ]) + + return await fill_out( + self.guild, + component_separator, + [ + ("SPACING_CLASS", spacing_class, PARSE_MODE_NONE), + ("DIVIDER", divider_html, PARSE_MODE_NONE), + ], + ) async def build_file(self, c): """Build a file component""" - file = self._get_attr(c, 'file', None) or self._get_attr(c, 'media', None) - spoiler = bool(self._get_attr(c, 'spoiler', False)) - + file = self._get_attr(c, "file", None) or self._get_attr(c, "media", None) + spoiler = bool(self._get_attr(c, "spoiler", False)) + url = self._get_media_url(file) if not url: return "" @@ -536,16 +590,20 @@ async def build_file(self, c): if related_attachment and not content_type: content_type = getattr(related_attachment, "content_type", None) file_icon = self._get_file_icon(file_name, content_type, url) - + spoiler_class = "chatlog__component-spoiler" if spoiler else "" - - return await fill_out(self.guild, component_file, [ - ("FILE_NAME", str(file_name), PARSE_MODE_NONE), - ("FILE_URL", str(url), PARSE_MODE_NONE), - ("FILE_ICON", str(file_icon), PARSE_MODE_NONE), - ("FILE_SIZE", str(file_size), PARSE_MODE_NONE), - ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), - ]) + + return await fill_out( + self.guild, + component_file, + [ + ("FILE_NAME", str(file_name), PARSE_MODE_NONE), + ("FILE_URL", str(url), PARSE_MODE_NONE), + ("FILE_ICON", str(file_icon), PARSE_MODE_NONE), + ("FILE_SIZE", str(file_size), PARSE_MODE_NONE), + ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), + ], + ) async def flow(self): # Try to handle the component directly @@ -554,7 +612,7 @@ async def flow(self): self.components += component_html else: # Fallback to legacy flow for action rows with children - children = getattr(self.component, 'children', []) or getattr(self.component, 'components', []) + children = getattr(self.component, "children", []) or getattr(self.component, "components", []) for c in children: child_html = await self.build_component(c) if child_html: diff --git a/chat_exporter/construct/assets/embed.py b/chat_exporter/construct/assets/embed.py index 145fae7..45b3d1b 100644 --- a/chat_exporter/construct/assets/embed.py +++ b/chat_exporter/construct/assets/embed.py @@ -3,11 +3,14 @@ from pytz import timezone from chat_exporter.ext.discord_import import discord - from chat_exporter.ext.html_generator import ( - fill_out, + PARSE_MODE_EMBED, + PARSE_MODE_MARKDOWN, + PARSE_MODE_NONE, + PARSE_MODE_SPECIAL_EMBED, + embed_author, + embed_author_icon, embed_body, - embed_title, embed_description, embed_field, embed_field_inline, @@ -15,12 +18,8 @@ embed_footer_icon, embed_image, embed_thumbnail, - embed_author, - embed_author_icon, - PARSE_MODE_NONE, - PARSE_MODE_EMBED, - PARSE_MODE_MARKDOWN, - PARSE_MODE_SPECIAL_EMBED, + embed_title, + fill_out, ) modules_which_use_none = ["nextcord", "disnake"] @@ -69,7 +68,8 @@ async def flow(self): def build_colour(self): self.r, self.g, self.b = ( (self.embed.colour.r, self.embed.colour.g, self.embed.colour.b) - if self.embed.colour != self.check_against else (0x4A, 0x4A, 0x50) + if self.embed.colour != self.check_against + else (0x4A, 0x4A, 0x50) ) def _format_embed_timestamp(self) -> str: @@ -99,26 +99,25 @@ async def build_title(self): self.title = "" return - title_html = await fill_out(self.guild, "{{EMBED_TITLE}}", [ - ("EMBED_TITLE", raw_title, PARSE_MODE_MARKDOWN) - ]) + title_html = await fill_out(self.guild, "{{EMBED_TITLE}}", [("EMBED_TITLE", raw_title, PARSE_MODE_MARKDOWN)]) url_value = getattr(self.embed, "url", self.check_against) if url_value and url_value != self.check_against: safe_url = html.escape(str(url_value), quote=True) title_html = f'{title_html}' - self.title = await fill_out(self.guild, embed_title, [ - ("EMBED_TITLE", title_html, PARSE_MODE_NONE) - ]) + self.title = await fill_out(self.guild, embed_title, [("EMBED_TITLE", title_html, PARSE_MODE_NONE)]) async def build_description(self): - self.description = html.escape(self.embed.description) if self.embed.description != self.check_against else "" + escaped_description = ( + html.escape(self.embed.description) if self.embed.description != self.check_against else "" + ) + self.description = "" - if self.description: - self.description = await fill_out(self.guild, embed_description, [ - ("EMBED_DESC", self.embed.description, PARSE_MODE_EMBED) - ]) + if escaped_description: + self.description = await fill_out( + self.guild, embed_description, [("EMBED_DESC", escaped_description, PARSE_MODE_EMBED)] + ) async def build_fields(self): self.fields = "" @@ -127,34 +126,82 @@ async def build_fields(self): if not self.embed.fields: return + rows = [] + current_row = [] for field in self.embed.fields: - field.name = html.escape(field.name) - field.value = html.escape(field.value) - - if field.inline: - self.fields += await fill_out(self.guild, embed_field_inline, [ - ("FIELD_NAME", field.name, PARSE_MODE_SPECIAL_EMBED), - ("FIELD_VALUE", field.value, PARSE_MODE_EMBED) - ]) + if not getattr(field, "inline", False): + if current_row: + rows.append(current_row) + current_row = [] + rows.append([field]) else: - self.fields += await fill_out(self.guild, embed_field, [ - ("FIELD_NAME", field.name, PARSE_MODE_SPECIAL_EMBED), - ("FIELD_VALUE", field.value, PARSE_MODE_EMBED)]) + current_row.append(field) + if len(current_row) == 3: + rows.append(current_row) + current_row = [] + if current_row: + rows.append(current_row) + + for row in rows: + if len(row) == 1 and not getattr(row[0], "inline", False): + field = row[0] + field.name = html.escape(field.name) + field.value = html.escape(field.value) + self.fields += await fill_out( + self.guild, + embed_field, + [ + ("FIELD_NAME", field.name, PARSE_MODE_SPECIAL_EMBED), + ("FIELD_VALUE", field.value, PARSE_MODE_EMBED), + ("GRID_COLUMN", "1 / 13", PARSE_MODE_NONE), + ], + ) + else: + if len(row) == 3: + cols = ["1 / 5", "5 / 9", "9 / 13"] + elif len(row) == 2: + cols = ["1 / 7", "7 / 13"] + else: + cols = ["1 / 13"] + + for idx, field in enumerate(row): + field.name = html.escape(field.name) + field.value = html.escape(field.value) + self.fields += await fill_out( + self.guild, + embed_field_inline, + [ + ("FIELD_NAME", field.name, PARSE_MODE_SPECIAL_EMBED), + ("FIELD_VALUE", field.value, PARSE_MODE_EMBED), + ("GRID_COLUMN", cols[idx], PARSE_MODE_NONE), + ], + ) async def build_author(self): - self.author = html.escape(self.embed.author.name) if ( - self.embed.author and self.embed.author.name != self.check_against - ) else "" + self.author = ( + html.escape(self.embed.author.name) + if (self.embed.author and self.embed.author.name != self.check_against) + else "" + ) - self.author = f'{self.author}' \ - if ( - self.embed.author and self.embed.author.url != self.check_against - ) else self.author + self.author = ( + f'{self.author}' + if (self.embed.author and self.embed.author.url != self.check_against) + else self.author + ) - author_icon = await fill_out(self.guild, embed_author_icon, [ - ("AUTHOR", self.author, PARSE_MODE_NONE), - ("AUTHOR_ICON", self.embed.author.icon_url, PARSE_MODE_NONE) - ]) if self.embed.author and self.embed.author.icon_url != self.check_against else "" + author_icon = ( + await fill_out( + self.guild, + embed_author_icon, + [ + ("AUTHOR", self.author, PARSE_MODE_NONE), + ("AUTHOR_ICON", self.embed.author.icon_url, PARSE_MODE_NONE), + ], + ) + if self.embed.author and self.embed.author.icon_url != self.check_against + else "" + ) if author_icon == "" and self.author != "": self.author = await fill_out(self.guild, embed_author, [("AUTHOR", self.author, PARSE_MODE_NONE)]) @@ -162,23 +209,33 @@ async def build_author(self): self.author = author_icon async def build_image(self): - self.image = await fill_out(self.guild, embed_image, [ - ("EMBED_IMAGE", str(self.embed.image.proxy_url), PARSE_MODE_NONE) - ]) if self.embed.image and self.embed.image.url != self.check_against else "" + self.image = ( + await fill_out(self.guild, embed_image, [("EMBED_IMAGE", str(self.embed.image.proxy_url), PARSE_MODE_NONE)]) + if self.embed.image and self.embed.image.url != self.check_against + else "" + ) async def build_thumbnail(self): - self.thumbnail = await fill_out(self.guild, embed_thumbnail, [ - ("EMBED_THUMBNAIL", str(self.embed.thumbnail.url), PARSE_MODE_NONE)]) \ - if self.embed.thumbnail and self.embed.thumbnail.url != self.check_against else "" + self.thumbnail = ( + await fill_out( + self.guild, embed_thumbnail, [("EMBED_THUMBNAIL", str(self.embed.thumbnail.url), PARSE_MODE_NONE)] + ) + if self.embed.thumbnail and self.embed.thumbnail.url != self.check_against + else "" + ) async def build_footer(self): - footer_text = html.escape(self.embed.footer.text) if ( - self.embed.footer and self.embed.footer.text != self.check_against - ) else "" + footer_text = ( + html.escape(self.embed.footer.text) + if (self.embed.footer and self.embed.footer.text != self.check_against) + else "" + ) - footer_icon = self.embed.footer.icon_url if ( - self.embed.footer and self.embed.footer.icon_url != self.check_against - ) else None + footer_icon = ( + self.embed.footer.icon_url + if (self.embed.footer and self.embed.footer.icon_url != self.check_against) + else None + ) timestamp_text = self._format_embed_timestamp() if footer_text and timestamp_text: @@ -191,24 +248,28 @@ async def build_footer(self): return if footer_icon is not None: - self.footer = await fill_out(self.guild, embed_footer_icon, [ - ("EMBED_FOOTER", footer_text, PARSE_MODE_NONE), - ("EMBED_FOOTER_ICON", footer_icon, PARSE_MODE_NONE) - ]) + self.footer = await fill_out( + self.guild, + embed_footer_icon, + [("EMBED_FOOTER", footer_text, PARSE_MODE_NONE), ("EMBED_FOOTER_ICON", footer_icon, PARSE_MODE_NONE)], + ) else: - self.footer = await fill_out(self.guild, embed_footer, [ - ("EMBED_FOOTER", footer_text, PARSE_MODE_NONE)]) + self.footer = await fill_out(self.guild, embed_footer, [("EMBED_FOOTER", footer_text, PARSE_MODE_NONE)]) async def build_embed(self): - self.embed = await fill_out(self.guild, embed_body, [ - ("EMBED_R", str(self.r)), - ("EMBED_G", str(self.g)), - ("EMBED_B", str(self.b)), - ("EMBED_AUTHOR", self.author, PARSE_MODE_NONE), - ("EMBED_TITLE", self.title, PARSE_MODE_NONE), - ("EMBED_IMAGE", self.image, PARSE_MODE_NONE), - ("EMBED_THUMBNAIL", self.thumbnail, PARSE_MODE_NONE), - ("EMBED_DESC", self.description, PARSE_MODE_NONE), - ("EMBED_FIELDS", self.fields, PARSE_MODE_NONE), - ("EMBED_FOOTER", self.footer, PARSE_MODE_NONE), - ]) + self.embed = await fill_out( + self.guild, + embed_body, + [ + ("EMBED_R", str(self.r)), + ("EMBED_G", str(self.g)), + ("EMBED_B", str(self.b)), + ("EMBED_AUTHOR", self.author, PARSE_MODE_NONE), + ("EMBED_TITLE", self.title, PARSE_MODE_NONE), + ("EMBED_IMAGE", self.image, PARSE_MODE_NONE), + ("EMBED_THUMBNAIL", self.thumbnail, PARSE_MODE_NONE), + ("EMBED_DESC", self.description, PARSE_MODE_NONE), + ("EMBED_FIELDS", self.fields, PARSE_MODE_NONE), + ("EMBED_FOOTER", self.footer, PARSE_MODE_NONE), + ], + ) diff --git a/chat_exporter/construct/assets/reaction.py b/chat_exporter/construct/assets/reaction.py index d970b6d..fa5070b 100644 --- a/chat_exporter/construct/assets/reaction.py +++ b/chat_exporter/construct/assets/reaction.py @@ -1,7 +1,12 @@ import re from chat_exporter.ext.emoji_convert import convert_emoji -from chat_exporter.ext.html_generator import fill_out, emoji, custom_emoji, PARSE_MODE_NONE +from chat_exporter.ext.html_generator import ( + PARSE_MODE_NONE, + custom_emoji, + emoji, + fill_out, +) class Reaction: @@ -27,15 +32,20 @@ async def build_reaction(self): async def create_discord_reaction(self, emoji_type): pattern = r":.*:(\d*)" emoji_id = re.search(pattern, str(self.reaction.emoji)).group(1) - self.reaction = await fill_out(self.guild, custom_emoji, [ - ("EMOJI", str(emoji_id), PARSE_MODE_NONE), - ("EMOJI_COUNT", str(self.reaction.count), PARSE_MODE_NONE), - ("EMOJI_FILE", emoji_type, PARSE_MODE_NONE) - ]) + self.reaction = await fill_out( + self.guild, + custom_emoji, + [ + ("EMOJI", str(emoji_id), PARSE_MODE_NONE), + ("EMOJI_COUNT", str(self.reaction.count), PARSE_MODE_NONE), + ("EMOJI_FILE", emoji_type, PARSE_MODE_NONE), + ], + ) async def create_standard_emoji(self): react_emoji = await convert_emoji(self.reaction.emoji) - self.reaction = await fill_out(self.guild, emoji, [ - ("EMOJI", str(react_emoji), PARSE_MODE_NONE), - ("EMOJI_COUNT", str(self.reaction.count), PARSE_MODE_NONE) - ]) + self.reaction = await fill_out( + self.guild, + emoji, + [("EMOJI", str(react_emoji), PARSE_MODE_NONE), ("EMOJI_COUNT", str(self.reaction.count), PARSE_MODE_NONE)], + ) diff --git a/chat_exporter/construct/attachment_handler.py b/chat_exporter/construct/attachment_handler.py index 512c72f..b86bbdd 100644 --- a/chat_exporter/construct/attachment_handler.py +++ b/chat_exporter/construct/attachment_handler.py @@ -1,106 +1,108 @@ +import asyncio import datetime import io +import os import pathlib -from typing import Union import urllib.parse - -import asyncio -import os +from typing import Union import aiohttp + from chat_exporter.ext.discord_import import discord class AttachmentHandler: - """Handle the saving of attachments (images, videos, audio, etc.) + """Handle the saving of attachments (images, videos, audio, etc.) + + Subclass this to implement your own asset handler.""" - Subclass this to implement your own asset handler.""" + async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: + """Implement this to process the asset and return a url to the stored attachment. + :param attachment: discord.Attachment + :return: str + """ + raise NotImplementedError - async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: - """Implement this to process the asset and return a url to the stored attachment. - :param attachment: discord.Attachment - :return: str - """ - raise NotImplementedError class AttachmentToLocalFileHostHandler(AttachmentHandler): - """Save the assets to a local file host and embed the assets in the transcript from there.""" - - def __init__(self, base_path: Union[str, pathlib.Path], url_base: str): - if isinstance(base_path, str): - base_path = pathlib.Path(base_path) - self.base_path = base_path - self.url_base = url_base - - async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: - """Implement this to process the asset and return a url to the stored attachment. - :param attachment: discord.Attachment - :return: str - """ - file_name = urllib.parse.quote_plus(f"{datetime.datetime.utcnow().timestamp()}_{attachment.filename}") - asset_path = self.base_path / file_name - await attachment.save(asset_path) - file_url = f"{self.url_base}/{file_name}" - attachment.url = file_url - attachment.proxy_url = file_url - return attachment + """Save the assets to a local file host and embed the assets in the transcript from there.""" + + def __init__(self, base_path: Union[str, pathlib.Path], url_base: str): + if isinstance(base_path, str): + base_path = pathlib.Path(base_path) + self.base_path = base_path + self.url_base = url_base + + async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: + """Implement this to process the asset and return a url to the stored attachment. + :param attachment: discord.Attachment + :return: str + """ + file_name = urllib.parse.quote_plus(f"{datetime.datetime.utcnow().timestamp()}_{attachment.filename}") + asset_path = self.base_path / file_name + await attachment.save(asset_path) + file_url = f"{self.url_base}/{file_name}" + attachment.url = file_url + attachment.proxy_url = file_url + return attachment class AttachmentToDiscordChannelHandler(AttachmentHandler): - """Save the attachment to a discord channel and embed the assets in the transcript from there.""" - - def __init__(self, channel: discord.TextChannel): - self.channel = channel - - async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: - """Implement this to process the asset and return a url to the stored attachment. - :param attachment: discord.Attachment - :return: str - """ - try: - async with aiohttp.ClientSession() as session: - async with session.get(attachment.url) as res: - if res.status != 200: - res.raise_for_status() - data = io.BytesIO(await res.read()) - data.seek(0) - attach = discord.File(data, attachment.filename) - msg: discord.Message = await self.channel.send(file=attach) - return msg.attachments[0] - except discord.errors.HTTPException as e: - # discords http errors, including missing permissions - raise e - + """Save the attachment to a discord channel and embed the assets in the transcript from there.""" + + def __init__(self, channel: discord.TextChannel): + self.channel = channel + + async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: + """Implement this to process the asset and return a url to the stored attachment. + :param attachment: discord.Attachment + :return: str + """ + try: + async with aiohttp.ClientSession() as session: + async with session.get(attachment.url) as res: + if res.status != 200: + res.raise_for_status() + data = io.BytesIO(await res.read()) + data.seek(0) + attach = discord.File(data, attachment.filename) + msg: discord.Message = await self.channel.send(file=attach) + return msg.attachments[0] + except discord.errors.HTTPException as e: + # discords http errors, including missing permissions + raise e + + class AttachmentToWebhookHandler(AttachmentHandler): - """Save the attachment to a discord channel using webhook and embed the assets in the transcript from there.""" - - def __init__(self, webhook_link: str) -> None: - self.webhook_link = webhook_link - self.size_limit = 8 * 1024 * 1024 # 8 MB = 8 * 1024 KB * 1024 B - self.placeholder_path = os.path.join(os.path.dirname(__file__), "too_large.png") - - async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: - """Implement this to process the asset and return a url to the stored attachment. - :param attachment: discord.Attachment - :return: str""" - try: - if attachment.size > self.size_limit: - file = discord.File(self.placeholder_path, filename="too_large.png") - else: - file = await attachment.to_file() - - async with aiohttp.ClientSession() as session: - webhook = discord.Webhook.from_url(self.webhook_link, session=session) - for i in range(3): - try: - message = await webhook.send(file=file, wait=True) - break - except aiohttp.ClientConnectionError: - print(f"Retry {i+1}/3 | Error - Webhook connection failed.") - await asyncio.sleep(3) # to prevent frequent retries on connection error - - except discord.errors.HTTPException as e: - # discords http errors, including missing permissions - raise e - else: - return message.attachments[0] \ No newline at end of file + """Save the attachment to a discord channel using webhook and embed the assets in the transcript from there.""" + + def __init__(self, webhook_link: str) -> None: + self.webhook_link = webhook_link + self.size_limit = 8 * 1024 * 1024 # 8 MB = 8 * 1024 KB * 1024 B + self.placeholder_path = os.path.join(os.path.dirname(__file__), "too_large.png") + + async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment: + """Implement this to process the asset and return a url to the stored attachment. + :param attachment: discord.Attachment + :return: str""" + try: + if attachment.size > self.size_limit: + file = discord.File(self.placeholder_path, filename="too_large.png") + else: + file = await attachment.to_file() + + async with aiohttp.ClientSession() as session: + webhook = discord.Webhook.from_url(self.webhook_link, session=session) + for i in range(3): + try: + message = await webhook.send(file=file, wait=True) + break + except aiohttp.ClientConnectionError: + print(f"Retry {i + 1}/3 | Error - Webhook connection failed.") + await asyncio.sleep(3) # to prevent frequent retries on connection error + + except discord.errors.HTTPException as e: + # discords http errors, including missing permissions + raise e + else: + return message.attachments[0] diff --git a/chat_exporter/construct/message.py b/chat_exporter/construct/message.py index 8be1e23..ebfbb66 100644 --- a/chat_exporter/construct/message.py +++ b/chat_exporter/construct/message.py @@ -1,39 +1,35 @@ import html -import io -import traceback -from typing import List, Optional, Union +from datetime import timedelta +from typing import List, Optional -import aiohttp from pytz import timezone -from datetime import timedelta +from chat_exporter.construct.assets import Attachment, Component, Embed, Reaction from chat_exporter.construct.attachment_handler import AttachmentHandler +from chat_exporter.ext.cache import cache from chat_exporter.ext.discord_import import discord - -from chat_exporter.construct.assets import Attachment, Component, Embed, Reaction from chat_exporter.ext.discord_utils import DiscordUtils from chat_exporter.ext.discriminator import discriminator -from chat_exporter.ext.cache import cache from chat_exporter.ext.html_generator import ( - fill_out, + PARSE_MODE_MARKDOWN, + PARSE_MODE_NONE, + PARSE_MODE_REFERENCE, bot_tag, bot_tag_verified, + end_message, + fill_out, + img_attachment, message_body, - message_pin, - message_thread, message_content, + message_interaction, + message_pin, message_reference, - message_reference_unknown, message_reference_forwarded, - message_interaction, - img_attachment, - start_message, - end_message, - PARSE_MODE_NONE, - PARSE_MODE_MARKDOWN, - PARSE_MODE_REFERENCE, - message_thread_remove, + message_reference_unknown, + message_thread, message_thread_add, + message_thread_remove, + start_message, ) @@ -70,7 +66,7 @@ def __init__( guild: discord.Guild, meta_data: dict, message_dict: dict, - attachment_handler: Optional[AttachmentHandler] + attachment_handler: Optional[AttachmentHandler], ): self.message = message self.previous_message = previous_message @@ -105,23 +101,14 @@ def _collect_attachment_urls(attachment): @staticmethod def _embed_has_non_image_content(embed) -> bool: - if getattr(embed, "title", None): - return True - if getattr(embed, "description", None): - return True - if getattr(embed, "fields", None): - if len(embed.fields) > 0: - return True - author = getattr(embed, "author", None) - if author and getattr(author, "name", None): - return True - footer = getattr(embed, "footer", None) - if footer and getattr(footer, "text", None): - return True - thumbnail = getattr(embed, "thumbnail", None) - if thumbnail and getattr(thumbnail, "url", None): - return True - return False + return bool( + getattr(embed, "title", None) + or getattr(embed, "description", None) + or (getattr(embed, "fields", None) and len(embed.fields) > 0) + or (getattr(embed, "author", None) and getattr(embed.author, "name", None)) + or (getattr(embed, "footer", None) and getattr(embed.footer, "text", None)) + or (getattr(embed, "thumbnail", None) and getattr(embed.thumbnail, "url", None)) + ) def _is_duplicate_image_embed(self, embed, attachment_urls) -> bool: if not attachment_urls: @@ -186,7 +173,8 @@ async def build_meta_data(self): user_created_at = self.message.author.created_at user_bot = _gather_user_bot(self.message.author) user_avatar = ( - self.message.author.display_avatar if self.message.author.display_avatar + self.message.author.display_avatar + if self.message.author.display_avatar else DiscordUtils.default_avatar ) user_joined_at = self.message.author.joined_at if hasattr(self.message.author, "joined_at") else None @@ -196,7 +184,13 @@ async def build_meta_data(self): else "" ) self.meta_data[user_id] = [ - user_name_discriminator, user_created_at, user_bot, user_avatar, 1, user_joined_at, user_display_name + user_name_discriminator, + user_created_at, + user_bot, + user_avatar, + 1, + user_joined_at, + user_display_name, ] async def build_content(self): @@ -219,10 +213,14 @@ async def build_content(self): if self.forwarded: combined = f'
{combined}
' - self.message.content = await fill_out(self.guild, message_content, [ - ("MESSAGE_CONTENT", combined, PARSE_MODE_MARKDOWN), - ("EDIT", self.message_edited_at, PARSE_MODE_NONE), - ]) + self.message.content = await fill_out( + self.guild, + message_content, + [ + ("MESSAGE_CONTENT", combined, PARSE_MODE_MARKDOWN), + ("EDIT", self.message_edited_at, PARSE_MODE_NONE), + ], + ) async def build_reference(self): if not self.message.reference: @@ -248,8 +246,9 @@ async def build_reference(self): icon = "" dummy = "" + def get_interaction_status(interaction_message): - if hasattr(interaction_message, 'interaction_metadata'): + if hasattr(interaction_message, "interaction_metadata"): return interaction_message.interaction_metadata return interaction_message.interaction @@ -270,21 +269,25 @@ def get_interaction_status(interaction_message): message_edited_at = _set_edit_at(message_edited_at) avatar_url = message.author.display_avatar if message.author.display_avatar else DiscordUtils.default_avatar - self.message.reference = await fill_out(self.guild, message_reference, [ - ("AVATAR_URL", str(avatar_url), PARSE_MODE_NONE), - ("BOT_TAG", is_bot, PARSE_MODE_NONE), - ("NAME_TAG", await discriminator(message.author.name, message.author.discriminator), PARSE_MODE_NONE), - ("NAME", str(html.escape(message.author.display_name))), - ("USER_COLOUR", user_colour, PARSE_MODE_NONE), - ("CONTENT", message.content.replace("\n", "").replace("
", ""), PARSE_MODE_REFERENCE), - ("EDIT", message_edited_at, PARSE_MODE_NONE), - ("ICON", icon, PARSE_MODE_NONE), - ("USER_ID", str(message.author.id), PARSE_MODE_NONE), - ("MESSAGE_ID", str(self.message.reference.message_id), PARSE_MODE_NONE), - ]) + self.message.reference = await fill_out( + self.guild, + message_reference, + [ + ("AVATAR_URL", str(avatar_url), PARSE_MODE_NONE), + ("BOT_TAG", is_bot, PARSE_MODE_NONE), + ("NAME_TAG", await discriminator(message.author.name, message.author.discriminator), PARSE_MODE_NONE), + ("NAME", str(html.escape(message.author.display_name))), + ("USER_COLOUR", user_colour, PARSE_MODE_NONE), + ("CONTENT", message.content.replace("\n", "").replace("
", ""), PARSE_MODE_REFERENCE), + ("EDIT", message_edited_at, PARSE_MODE_NONE), + ("ICON", icon, PARSE_MODE_NONE), + ("USER_ID", str(message.author.id), PARSE_MODE_NONE), + ("MESSAGE_ID", str(self.message.reference.message_id), PARSE_MODE_NONE), + ], + ) async def build_interaction(self): - if hasattr(self.message, 'interaction_metadata'): + if hasattr(self.message, "interaction_metadata"): if not self.message.interaction_metadata: self.interaction = "" return @@ -303,22 +306,26 @@ async def build_interaction(self): user_colour = await self._gather_user_colour(user) avatar_url = user.display_avatar if user.display_avatar else DiscordUtils.default_avatar - self.interaction = await fill_out(self.guild, message_interaction, [ - ("AVATAR_URL", str(avatar_url), PARSE_MODE_NONE), - ("BOT_TAG", is_bot, PARSE_MODE_NONE), - ("NAME_TAG", await discriminator(user.name, user.discriminator), PARSE_MODE_NONE), - ("NAME", str(html.escape(user.display_name))), - ("COMMAND", str(command), PARSE_MODE_NONE), - ("USER_COLOUR", user_colour, PARSE_MODE_NONE), - ("FILLER", "used ", PARSE_MODE_NONE), - ("USER_ID", str(user.id), PARSE_MODE_NONE), - ("INTERACTION_ID", str(interaction_id), PARSE_MODE_NONE), - ]) + self.interaction = await fill_out( + self.guild, + message_interaction, + [ + ("AVATAR_URL", str(avatar_url), PARSE_MODE_NONE), + ("BOT_TAG", is_bot, PARSE_MODE_NONE), + ("NAME_TAG", await discriminator(user.name, user.discriminator), PARSE_MODE_NONE), + ("NAME", str(html.escape(user.display_name))), + ("COMMAND", str(command), PARSE_MODE_NONE), + ("USER_COLOUR", user_colour, PARSE_MODE_NONE), + ("FILLER", "used ", PARSE_MODE_NONE), + ("USER_ID", str(user.id), PARSE_MODE_NONE), + ("INTERACTION_ID", str(interaction_id), PARSE_MODE_NONE), + ], + ) async def build_sticker(self): sticker = None sticker_image_url = None - + if self.message.stickers and hasattr(self.message.stickers[0], "url"): sticker_image_url = self.message.stickers[0].url if not sticker_image_url: @@ -330,12 +337,11 @@ async def build_sticker(self): if not sticker_image_url: return - if sticker_image_url.endswith(".json"): try: sticker = await self.message.stickers[0].fetch() - except: + except Exception: for snapshot in self.get_message_snapshots(): if hasattr(snapshot, "stickers") and snapshot.stickers and hasattr(snapshot.stickers[0], "url"): sticker = await snapshot.stickers[0].fetch() @@ -344,10 +350,14 @@ async def build_sticker(self): f"https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/stickers/{sticker.pack_id}/{sticker.id}.gif" ) - self.message.content = await fill_out(self.guild, img_attachment, [ - ("ATTACH_URL", str(sticker_image_url), PARSE_MODE_NONE), - ("ATTACH_URL_THUMB", str(sticker_image_url), PARSE_MODE_NONE) - ]) + self.message.content = await fill_out( + self.guild, + img_attachment, + [ + ("ATTACH_URL", str(sticker_image_url), PARSE_MODE_NONE), + ("ATTACH_URL_THUMB", str(sticker_image_url), PARSE_MODE_NONE), + ], + ) async def build_assets(self): processed_attachments = [] @@ -372,13 +382,13 @@ async def build_assets(self): for a in processed_attachments: self.attachments += await Attachment(a, self.guild).flow() - + for snapshot in self.get_message_snapshots(): if hasattr(snapshot, "attachments"): for sa in snapshot.attachments: if self.attachment_handler: sa = await self.attachment_handler.process_asset(sa) - self.attachments += await Attachment(sa,self.guild).flow() + self.attachments += await Attachment(sa, self.guild).flow() self.message.reference = message_reference_forwarded for c in self.message.components: @@ -387,7 +397,7 @@ async def build_assets(self): for snapshot in self.get_message_snapshots(): if hasattr(snapshot, "components"): for ac in snapshot.components: - self.components += await Component(ac,self.guild).flow() + self.components += await Component(ac, self.guild).flow() self.message.reference = message_reference_forwarded for r in self.message.reactions: @@ -402,25 +412,32 @@ async def build_message_template(self): if started: return self.message_html - self.message_html += await fill_out(self.guild, message_body, [ - ("MESSAGE_ID", str(self.message.id)), - ("MESSAGE_CONTENT", self.message.content, PARSE_MODE_NONE), - ("EMBEDS", self.embeds, PARSE_MODE_NONE), - ("ATTACHMENTS", self.attachments, PARSE_MODE_NONE), - ("COMPONENTS", self.components, PARSE_MODE_NONE), - ("EMOJI", self.reactions, PARSE_MODE_NONE), - ("TIMESTAMP", self.message_created_at, PARSE_MODE_NONE), - ("TIME", self.message_created_at.split(maxsplit=4)[4], PARSE_MODE_NONE), - ]) + self.message_html += await fill_out( + self.guild, + message_body, + [ + ("MESSAGE_ID", str(self.message.id)), + ("MESSAGE_CONTENT", self.message.content, PARSE_MODE_NONE), + ("EMBEDS", self.embeds, PARSE_MODE_NONE), + ("ATTACHMENTS", self.attachments, PARSE_MODE_NONE), + ("COMPONENTS", self.components, PARSE_MODE_NONE), + ("EMOJI", self.reactions, PARSE_MODE_NONE), + ("TIMESTAMP", self.message_created_at, PARSE_MODE_NONE), + ("TIME", self.message_created_at.split(maxsplit=4)[4], PARSE_MODE_NONE), + ], + ) return self.message_html def _generate_message_divider_check(self): return bool( - self.previous_message is None or self.message.reference != "" or - self.previous_message.type is not discord.MessageType.default or self.interaction != "" or - self.previous_message.author.id != self.message.author.id or self.message.webhook_id is not None or - self.message.created_at > (self.previous_message.created_at + timedelta(minutes=4)) + self.previous_message is None + or self.message.reference != "" + or self.previous_message.type is not discord.MessageType.default + or self.interaction != "" + or self.previous_message.author.id != self.message.author.id + or self.message.webhook_id is not None + or self.message.created_at > (self.previous_message.created_at + timedelta(minutes=4)) ) async def generate_message_divider(self, channel_audit=False): @@ -434,7 +451,9 @@ async def generate_message_divider(self, channel_audit=False): followup_symbol = "" is_bot = _gather_user_bot(self.message.author) - avatar_url = self.message.author.display_avatar if self.message.author.display_avatar else DiscordUtils.default_avatar + avatar_url = self.message.author.display_avatar + if not avatar_url: + avatar_url = DiscordUtils.default_avatar if self.message.reference != "" or self.interaction: followup_symbol = "
" @@ -448,81 +467,129 @@ async def generate_message_divider(self, channel_audit=False): else: default_timestamp = time.astimezone(timezone(self.pytz_timezone)).strftime("%d-%m-%Y %I:%M %p") - self.message_html += await fill_out(self.guild, start_message, [ - ("REFERENCE_SYMBOL", followup_symbol, PARSE_MODE_NONE), - ("REFERENCE", self.message.reference if self.message.reference else self.interaction, - PARSE_MODE_NONE), - ("AVATAR_URL", str(avatar_url), PARSE_MODE_NONE), - ("NAME_TAG", await discriminator(self.message.author.name, self.message.author.discriminator), PARSE_MODE_NONE), - ("USER_ID", str(self.message.author.id)), - ("USER_COLOUR", await self._gather_user_colour(self.message.author)), - ("USER_ICON", await self._gather_user_icon(self.message.author), PARSE_MODE_NONE), - ("NAME", str(html.escape(self.message.author.display_name))), - ("BOT_TAG", str(is_bot), PARSE_MODE_NONE), - ("TIMESTAMP", str(self.message_created_at)), - ("DEFAULT_TIMESTAMP", str(default_timestamp), PARSE_MODE_NONE), - ("MESSAGE_ID", str(self.message.id)), - ("MESSAGE_CONTENT", self.message.content, PARSE_MODE_NONE), - ("EMBEDS", self.embeds, PARSE_MODE_NONE), - ("ATTACHMENTS", self.attachments, PARSE_MODE_NONE), - ("COMPONENTS", self.components, PARSE_MODE_NONE), - ("EMOJI", self.reactions, PARSE_MODE_NONE) - ]) + self.message_html += await fill_out( + self.guild, + start_message, + [ + ("REFERENCE_SYMBOL", followup_symbol, PARSE_MODE_NONE), + ( + "REFERENCE", + self.message.reference if self.message.reference else self.interaction, + PARSE_MODE_NONE, + ), + ("AVATAR_URL", str(avatar_url), PARSE_MODE_NONE), + ( + "NAME_TAG", + await discriminator(self.message.author.name, self.message.author.discriminator), + PARSE_MODE_NONE, + ), + ("USER_ID", str(self.message.author.id)), + ("USER_COLOUR", await self._gather_user_colour(self.message.author)), + ("USER_ICON", await self._gather_user_icon(self.message.author), PARSE_MODE_NONE), + ("NAME", str(html.escape(self.message.author.display_name))), + ("BOT_TAG", str(is_bot), PARSE_MODE_NONE), + ("TIMESTAMP", str(self.message_created_at)), + ("DEFAULT_TIMESTAMP", str(default_timestamp), PARSE_MODE_NONE), + ("MESSAGE_ID", str(self.message.id)), + ("MESSAGE_CONTENT", self.message.content, PARSE_MODE_NONE), + ("EMBEDS", self.embeds, PARSE_MODE_NONE), + ("ATTACHMENTS", self.attachments, PARSE_MODE_NONE), + ("COMPONENTS", self.components, PARSE_MODE_NONE), + ("EMOJI", self.reactions, PARSE_MODE_NONE), + ], + ) return True async def build_pin_template(self): - self.message_html += await fill_out(self.guild, message_pin, [ - ("PIN_URL", DiscordUtils.pinned_message_icon, PARSE_MODE_NONE), - ("USER_COLOUR", await self._gather_user_colour(self.message.author)), - ("NAME", str(html.escape(self.message.author.display_name))), - ("NAME_TAG", await discriminator(self.message.author.name, self.message.author.discriminator), PARSE_MODE_NONE), - ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), - ("REF_MESSAGE_ID", str(self.message.reference.message_id) if self.message.reference else "", PARSE_MODE_NONE) - ]) + self.message_html += await fill_out( + self.guild, + message_pin, + [ + ("PIN_URL", DiscordUtils.pinned_message_icon, PARSE_MODE_NONE), + ("USER_COLOUR", await self._gather_user_colour(self.message.author)), + ("NAME", str(html.escape(self.message.author.display_name))), + ( + "NAME_TAG", + await discriminator(self.message.author.name, self.message.author.discriminator), + PARSE_MODE_NONE, + ), + ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), + ( + "REF_MESSAGE_ID", + str(self.message.reference.message_id) if self.message.reference else "", + PARSE_MODE_NONE, + ), + ], + ) async def build_thread_template(self): - self.message_html += await fill_out(self.guild, message_thread, [ - ("THREAD_URL", DiscordUtils.thread_channel_icon, - PARSE_MODE_NONE), - ("THREAD_NAME", self.message.content, PARSE_MODE_NONE), - ("USER_COLOUR", await self._gather_user_colour(self.message.author)), - ("NAME", str(html.escape(self.message.author.display_name))), - ("NAME_TAG", await discriminator(self.message.author.name, self.message.author.discriminator), PARSE_MODE_NONE), - ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), - ]) + self.message_html += await fill_out( + self.guild, + message_thread, + [ + ("THREAD_URL", DiscordUtils.thread_channel_icon, PARSE_MODE_NONE), + ("THREAD_NAME", self.message.content, PARSE_MODE_NONE), + ("USER_COLOUR", await self._gather_user_colour(self.message.author)), + ("NAME", str(html.escape(self.message.author.display_name))), + ( + "NAME_TAG", + await discriminator(self.message.author.name, self.message.author.discriminator), + PARSE_MODE_NONE, + ), + ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), + ], + ) async def build_remove(self): removed_member: discord.Member = self.message.mentions[0] - self.message_html += await fill_out(self.guild, message_thread_remove, [ - ("THREAD_URL", DiscordUtils.thread_remove_recipient, - PARSE_MODE_NONE), - ("USER_COLOUR", await self._gather_user_colour(self.message.author)), - ("NAME", str(html.escape(self.message.author.display_name))), - ("NAME_TAG", await discriminator(self.message.author.name, self.message.author.discriminator), - PARSE_MODE_NONE), - ("RECIPIENT_USER_COLOUR", await self._gather_user_colour(removed_member)), - ("RECIPIENT_NAME", str(html.escape(removed_member.display_name))), - ("RECIPIENT_NAME_TAG", await discriminator(removed_member.name, removed_member.discriminator), - PARSE_MODE_NONE), - ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), - ]) + self.message_html += await fill_out( + self.guild, + message_thread_remove, + [ + ("THREAD_URL", DiscordUtils.thread_remove_recipient, PARSE_MODE_NONE), + ("USER_COLOUR", await self._gather_user_colour(self.message.author)), + ("NAME", str(html.escape(self.message.author.display_name))), + ( + "NAME_TAG", + await discriminator(self.message.author.name, self.message.author.discriminator), + PARSE_MODE_NONE, + ), + ("RECIPIENT_USER_COLOUR", await self._gather_user_colour(removed_member)), + ("RECIPIENT_NAME", str(html.escape(removed_member.display_name))), + ( + "RECIPIENT_NAME_TAG", + await discriminator(removed_member.name, removed_member.discriminator), + PARSE_MODE_NONE, + ), + ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), + ], + ) async def build_add(self): removed_member: discord.Member = self.message.mentions[0] - self.message_html += await fill_out(self.guild, message_thread_add, [ - ("THREAD_URL", DiscordUtils.thread_add_recipient, - PARSE_MODE_NONE), - ("USER_COLOUR", await self._gather_user_colour(self.message.author)), - ("NAME", str(html.escape(self.message.author.display_name))), - ("NAME_TAG", await discriminator(self.message.author.name, self.message.author.discriminator), - PARSE_MODE_NONE), - ("RECIPIENT_USER_COLOUR", await self._gather_user_colour(removed_member)), - ("RECIPIENT_NAME", str(html.escape(removed_member.display_name))), - ("RECIPIENT_NAME_TAG", await discriminator(removed_member.name, removed_member.discriminator), - PARSE_MODE_NONE), - ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), - ]) + self.message_html += await fill_out( + self.guild, + message_thread_add, + [ + ("THREAD_URL", DiscordUtils.thread_add_recipient, PARSE_MODE_NONE), + ("USER_COLOUR", await self._gather_user_colour(self.message.author)), + ("NAME", str(html.escape(self.message.author.display_name))), + ( + "NAME_TAG", + await discriminator(self.message.author.name, self.message.author.discriminator), + PARSE_MODE_NONE, + ), + ("RECIPIENT_USER_COLOUR", await self._gather_user_colour(removed_member)), + ("RECIPIENT_NAME", str(html.escape(removed_member.display_name))), + ( + "RECIPIENT_NAME_TAG", + await discriminator(removed_member.name, removed_member.discriminator), + PARSE_MODE_NONE, + ), + ("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE), + ], + ) @cache() async def _gather_member(self, author: discord.Member): @@ -576,7 +643,7 @@ async def gather_messages( military_time, attachment_handler: Optional[AttachmentHandler], ) -> (str, dict): - message_html: str = "" + message_html_chunks: List[str] = [] meta_data: dict = {} previous_message: Optional[discord.Message] = None @@ -602,10 +669,10 @@ async def gather_messages( meta_data, message_dict, attachment_handler, - ).construct_message() + ).construct_message() - message_html += content_html + message_html_chunks.append(content_html) previous_message = message - message_html += "
" - return message_html, meta_data \ No newline at end of file + message_html_chunks.append("
") + return "".join(message_html_chunks), meta_data diff --git a/chat_exporter/construct/transcript.py b/chat_exporter/construct/transcript.py index 55f1fe9..334c7c9 100644 --- a/chat_exporter/construct/transcript.py +++ b/chat_exporter/construct/transcript.py @@ -1,24 +1,28 @@ import html +import re import traceback from datetime import datetime - -import re from typing import List, Optional import pytz +from chat_exporter.construct.assets.component import Component from chat_exporter.construct.attachment_handler import AttachmentHandler -from chat_exporter.ext.discord_import import discord - from chat_exporter.construct.message import gather_messages -from chat_exporter.construct.assets.component import Component - from chat_exporter.ext.cache import clear_cache -from chat_exporter.parse.mention import pass_bot +from chat_exporter.ext.discord_import import discord from chat_exporter.ext.discord_utils import DiscordUtils from chat_exporter.ext.html_generator import ( - fill_out, total, channel_topic, meta_data_temp, fancy_time, channel_subject, PARSE_MODE_NONE, PARSE_MODE_HTML_SAFE + PARSE_MODE_HTML_SAFE, + PARSE_MODE_NONE, + channel_subject, + channel_topic, + fancy_time, + fill_out, + meta_data_temp, + total, ) +from chat_exporter.parse.markdown import pass_bot class TranscriptDAO: @@ -59,11 +63,7 @@ def __init__( async def build_transcript(self): message_html, meta_data = await gather_messages( - self.messages, - self.channel.guild, - self.pytz_timezone, - self.military_time, - self.attachment_handler + self.messages, self.channel.guild, self.pytz_timezone, self.military_time, self.attachment_handler ) await self.export_transcript(message_html, meta_data) clear_cache() @@ -71,9 +71,11 @@ async def build_transcript(self): return self async def export_transcript(self, message_html: str, meta_data: str): - guild_icon = self.channel.guild.icon if ( - self.channel.guild.icon and len(self.channel.guild.icon) > 2 - ) else DiscordUtils.default_avatar + guild_icon = ( + self.channel.guild.icon + if (self.channel.guild.icon and len(self.channel.guild.icon) > 2) + else DiscordUtils.default_avatar + ) guild_name = html.escape(self.channel.guild.name) @@ -83,32 +85,41 @@ async def export_transcript(self, message_html: str, meta_data: str): else: time_now = datetime.now(timezone).strftime("%e %B %Y at %I:%M:%S %p (%Z)") - meta_data_html: str = "" + meta_data_html_chunks: List[str] = [] for data in meta_data: creation_time = meta_data[int(data)][1].astimezone(timezone).strftime("%b %d, %Y") joined_time = ( meta_data[int(data)][5].astimezone(timezone).strftime("%b %d, %Y") - if meta_data[int(data)][5] else "Unknown" + if meta_data[int(data)][5] + else "Unknown" ) - pattern = r'^#\d{4}' + pattern = r"^#\d{4}" discrim = str(meta_data[int(data)][0][-5:]) user = str(meta_data[int(data)][0]) - meta_data_html += await fill_out(self.channel.guild, meta_data_temp, [ - ("USER_ID", str(data), PARSE_MODE_NONE), - ("USERNAME", user[:-5] if re.match(pattern, discrim) else user, PARSE_MODE_NONE), - ("DISCRIMINATOR", discrim if re.match(pattern, discrim) else ""), - ("BOT", str(meta_data[int(data)][2]), PARSE_MODE_NONE), - ("CREATED_AT", str(creation_time), PARSE_MODE_NONE), - ("JOINED_AT", str(joined_time), PARSE_MODE_NONE), - ("GUILD_ICON", str(guild_icon), PARSE_MODE_NONE), - ("DISCORD_ICON", str(DiscordUtils.logo), PARSE_MODE_NONE), - ("MEMBER_ID", str(data), PARSE_MODE_NONE), - ("USER_AVATAR", str(meta_data[int(data)][3]), PARSE_MODE_NONE), - ("DISPLAY", str(meta_data[int(data)][6]), PARSE_MODE_NONE), - ("MESSAGE_COUNT", str(meta_data[int(data)][4])) - ]) + meta_data_html_chunks.append( + await fill_out( + self.channel.guild, + meta_data_temp, + [ + ("USER_ID", str(data), PARSE_MODE_NONE), + ("USERNAME", user[:-5] if re.match(pattern, discrim) else user, PARSE_MODE_NONE), + ("DISCRIMINATOR", discrim if re.match(pattern, discrim) else ""), + ("BOT", str(meta_data[int(data)][2]), PARSE_MODE_NONE), + ("CREATED_AT", str(creation_time), PARSE_MODE_NONE), + ("JOINED_AT", str(joined_time), PARSE_MODE_NONE), + ("GUILD_ICON", str(guild_icon), PARSE_MODE_NONE), + ("DISCORD_ICON", str(DiscordUtils.logo), PARSE_MODE_NONE), + ("MEMBER_ID", str(data), PARSE_MODE_NONE), + ("USER_AVATAR", str(meta_data[int(data)][3]), PARSE_MODE_NONE), + ("DISPLAY", str(meta_data[int(data)][6]), PARSE_MODE_NONE), + ("MESSAGE_COUNT", str(meta_data[int(data)][4]), PARSE_MODE_NONE), + ], + ) + ) + + meta_data_html = "".join(meta_data_html_chunks) if self.military_time: channel_creation_time = self.channel.created_at.astimezone(timezone).strftime("%b %d, %Y (%H:%M:%S)") @@ -121,25 +132,29 @@ async def export_transcript(self, message_html: str, meta_data: str): channel_topic_html = "" if raw_channel_topic: - channel_topic_html = await fill_out(self.channel.guild, channel_topic, [ - ("CHANNEL_TOPIC", html.escape(raw_channel_topic)) - ]) + channel_topic_html = await fill_out( + self.channel.guild, channel_topic, [("CHANNEL_TOPIC", html.escape(raw_channel_topic))] + ) limit = "start" if self.limit: limit = f"latest {self.limit} messages" - subject = await fill_out(self.channel.guild, channel_subject, [ - ("LIMIT", limit, PARSE_MODE_NONE), - ("CHANNEL_NAME", self.channel.name), - ("RAW_CHANNEL_TOPIC", str(raw_channel_topic)) - ]) + subject = await fill_out( + self.channel.guild, + channel_subject, + [ + ("LIMIT", limit, PARSE_MODE_NONE), + ("CHANNEL_NAME", self.channel.name), + ("RAW_CHANNEL_TOPIC", str(raw_channel_topic)), + ], + ) sd = ( - '
' - ' DONATE' - '
' - ) if self.support_dev else "" + ('') + if self.support_dev + else "" + ) _fancy_time = "" @@ -149,40 +164,48 @@ async def export_transcript(self, message_html: str, meta_data: str): else: time_format = "hh:mm A" - _fancy_time = await fill_out(self.channel.guild, fancy_time, [ - ("TIME_FORMAT", time_format, PARSE_MODE_NONE), - ("TIMEZONE", str(self.pytz_timezone), PARSE_MODE_NONE) - ]) - - self.html = await fill_out(self.channel.guild, total, [ - ("SERVER_NAME", f"{guild_name}"), - ("GUILD_ID", str(self.channel.guild.id), PARSE_MODE_NONE), - ("SERVER_AVATAR_URL", str(guild_icon), PARSE_MODE_NONE), - ("CHANNEL_NAME", f"{self.channel.name}"), - ("MESSAGE_COUNT", str(len(self.messages))), - ("MESSAGES", message_html, PARSE_MODE_NONE), - ("META_DATA", meta_data_html, PARSE_MODE_NONE), - ("DATE_TIME", str(time_now)), - ("SUBJECT", subject, PARSE_MODE_NONE), - ("CHANNEL_CREATED_AT", str(channel_creation_time), PARSE_MODE_NONE), - ("CHANNEL_TOPIC", str(channel_topic_html), PARSE_MODE_NONE), - ("CHANNEL_ID", str(self.channel.id), PARSE_MODE_NONE), - ("MESSAGE_PARTICIPANTS", str(len(meta_data)), PARSE_MODE_NONE), - ("FANCY_TIME", _fancy_time, PARSE_MODE_NONE), - ("SD", sd, PARSE_MODE_NONE), - ("SERVER_NAME_SAFE", f"{guild_name}", PARSE_MODE_HTML_SAFE), - ("CHANNEL_NAME_SAFE", f"{html.escape(self.channel.name)}", PARSE_MODE_HTML_SAFE), - ]) + _fancy_time = await fill_out( + self.channel.guild, + fancy_time, + [("TIME_FORMAT", time_format, PARSE_MODE_NONE), ("TIMEZONE", str(self.pytz_timezone), PARSE_MODE_NONE)], + ) + + self.html = await fill_out( + self.channel.guild, + total, + [ + ("SERVER_NAME", f"{guild_name}"), + ("GUILD_ID", str(self.channel.guild.id), PARSE_MODE_NONE), + ("SERVER_AVATAR_URL", str(guild_icon), PARSE_MODE_NONE), + ("CHANNEL_NAME", f"{self.channel.name}"), + ("MESSAGE_COUNT", str(len(self.messages)), PARSE_MODE_NONE), + ("MESSAGES", message_html, PARSE_MODE_NONE), + ("META_DATA", meta_data_html, PARSE_MODE_NONE), + ("DATE_TIME", str(time_now)), + ("SUBJECT", subject, PARSE_MODE_NONE), + ("CHANNEL_CREATED_AT", str(channel_creation_time), PARSE_MODE_NONE), + ("CHANNEL_TOPIC", str(channel_topic_html), PARSE_MODE_NONE), + ("CHANNEL_ID", str(self.channel.id), PARSE_MODE_NONE), + ("MESSAGE_PARTICIPANTS", str(len(meta_data)), PARSE_MODE_NONE), + ("FANCY_TIME", _fancy_time, PARSE_MODE_NONE), + ("SD", sd, PARSE_MODE_NONE), + ("SERVER_NAME_SAFE", f"{guild_name}", PARSE_MODE_HTML_SAFE), + ("CHANNEL_NAME_SAFE", f"{html.escape(self.channel.name)}", PARSE_MODE_HTML_SAFE), + ], + ) class Transcript(TranscriptDAO): async def export(self): if not self.messages: - self.messages = [message async for message in self.channel.history( - limit=self.limit, - before=self.before, - after=self.after, - )] + self.messages = [ + message + async for message in self.channel.history( + limit=self.limit, + before=self.before, + after=self.after, + ) + ] if not self.after: self.messages.reverse() diff --git a/chat_exporter/ext/cache.py b/chat_exporter/ext/cache.py index 8889ac5..e53ac19 100644 --- a/chat_exporter/ext/cache.py +++ b/chat_exporter/ext/cache.py @@ -9,12 +9,14 @@ async def func(): value = await coro cache[key] = value return value + return func() def _wrap_new_coroutine(value): async def new_coroutine(): return value + return new_coroutine() @@ -29,16 +31,16 @@ def _true_repr(o): if o.__class__.__repr__ is object.__repr__: # this is how MessageConstruct can retain # caching across multiple instances - return f'<{o.__class__.__module__}.{o.__class__.__name__}>' + return f"<{o.__class__.__module__}.{o.__class__.__name__}>" return repr(o) - key = [f'{func.__module__}.{func.__name__}'] + key = [f"{func.__module__}.{func.__name__}"] key.extend(_true_repr(o) for o in args) for k, v in kwargs.items(): key.append(_true_repr(k)) key.append(_true_repr(v)) - return ':'.join(key) + return ":".join(key) @wraps(func) def wrapper(*args, **kwargs): @@ -54,4 +56,5 @@ def wrapper(*args, **kwargs): wrapper.cache = _internal_cache wrapper.clear_cache = _internal_cache.clear() return wrapper + return decorator diff --git a/chat_exporter/ext/discord_import.py b/chat_exporter/ext/discord_import.py index ef77f86..08121e5 100644 --- a/chat_exporter/ext/discord_import.py +++ b/chat_exporter/ext/discord_import.py @@ -1,4 +1,4 @@ -discord_modules = ['nextcord', 'disnake', 'discord'] +discord_modules = ["nextcord", "disnake", "discord"] for module in discord_modules: try: discord = __import__(module) diff --git a/chat_exporter/ext/discord_utils.py b/chat_exporter/ext/discord_utils.py index 8b284a2..d69237f 100644 --- a/chat_exporter/ext/discord_utils.py +++ b/chat_exporter/ext/discord_utils.py @@ -1,18 +1,22 @@ class DiscordUtils: - logo: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-logo.svg' - default_avatar: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-default.png' - pinned_message_icon: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-pinned.svg' - thread_channel_icon: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-thread.svg' - thread_remove_recipient: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-thread-remove-recipient.svg' - thread_add_recipient: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-thread-add-recipient.svg' - file_attachment_audio: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-audio.svg' - file_attachment_acrobat: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-acrobat.svg' - file_attachment_webcode: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-webcode.svg' - file_attachment_code: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-code.svg' - file_attachment_document: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-document.svg' - file_attachment_archive: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-archive.svg' - file_attachment_unknown: str = 'https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-unknown.svg' + logo: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-logo.svg" + default_avatar: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-default.png" + pinned_message_icon: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-pinned.svg" + thread_channel_icon: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-thread.svg" + thread_remove_recipient: str = ( + "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-thread-remove-recipient.svg" + ) + thread_add_recipient: str = ( + "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-thread-add-recipient.svg" + ) + file_attachment_audio: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-audio.svg" + file_attachment_acrobat: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-acrobat.svg" + file_attachment_webcode: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-webcode.svg" + file_attachment_code: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-code.svg" + file_attachment_document: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-document.svg" + file_attachment_archive: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-archive.svg" + file_attachment_unknown: str = "https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/discord-unknown.svg" button_external_link: str = '' reference_attachment_icon: str = '' interaction_command_icon: str = '' - interaction_dropdown_icon: str = '' \ No newline at end of file + interaction_dropdown_icon: str = '' diff --git a/chat_exporter/ext/emoji_convert.py b/chat_exporter/ext/emoji_convert.py index 2903733..4bff8e1 100644 --- a/chat_exporter/ext/emoji_convert.py +++ b/chat_exporter/ext/emoji_convert.py @@ -30,13 +30,13 @@ # Github: https://github.com/glasnt/emojificate # ################################################################################## import unicodedata -from grapheme import graphemes -import emoji + import aiohttp +import emoji +from grapheme import graphemes from chat_exporter.ext.cache import cache - cdn_fmt = "https://cdn.jsdelivr.net/gh/jdecked/twemoji@latest/assets/72x72/{codepoint}.png" diff --git a/chat_exporter/ext/html_generator.py b/chat_exporter/ext/html_generator.py index 2c716f3..f500c8a 100644 --- a/chat_exporter/ext/html_generator.py +++ b/chat_exporter/ext/html_generator.py @@ -1,9 +1,9 @@ import html import json import os +import re -from chat_exporter.parse.mention import ParseMention -from chat_exporter.parse.markdown import ParseMarkdown +from chat_exporter.parse.markdown import ParseMarkdown, bot as mention_bot dir_path = os.path.abspath(os.path.join((os.path.dirname(os.path.realpath(__file__))), "..")) @@ -18,34 +18,41 @@ async def fill_out(guild, base, replacements): + resolved = {} for r in replacements: if len(r) == 2: # default case k, v = r - r = (k, v, PARSE_MODE_MARKDOWN) + mode = PARSE_MODE_MARKDOWN + else: + k, v, mode = r - k, v, mode = r - - if mode != PARSE_MODE_NONE: - v = await ParseMention(v, guild).flow() if mode == PARSE_MODE_MARKDOWN: - v = await ParseMarkdown(v).standard_message_flow() + v = await ParseMarkdown(v, guild, mention_bot).standard_message_flow() elif mode == PARSE_MODE_EMBED: - v = await ParseMarkdown(v).standard_embed_flow() + v = await ParseMarkdown(v, guild, mention_bot).standard_embed_flow() elif mode == PARSE_MODE_SPECIAL_EMBED: - v = await ParseMarkdown(v).special_embed_flow() + v = await ParseMarkdown(v, guild, mention_bot).special_embed_flow() elif mode == PARSE_MODE_REFERENCE: - v = await ParseMarkdown(v).message_reference_flow() + v = await ParseMarkdown(v, guild, mention_bot).message_reference_flow() elif mode == PARSE_MODE_EMOJI: - v = await ParseMarkdown(v).special_emoji_flow() + v = await ParseMarkdown(v, guild, mention_bot).special_emoji_flow() elif mode == PARSE_MODE_HTML_SAFE: + if mode != PARSE_MODE_NONE: + v = await ParseMarkdown(v, guild, mention_bot).standard_embed_flow() # escape html characters v = html.escape(v, quote=True) # escape characters that could be used for xss v = json.dumps(v, ensure_ascii=False)[1:-1] + elif mode != PARSE_MODE_NONE: + v = await ParseMarkdown(v, guild, mention_bot).standard_embed_flow() + + resolved[k] = str(v or "").strip() - base = base.replace("{{" + k + "}}", str(v or "").strip()) + def repl(match): + key = match.group(1) + return resolved.get(key, match.group(0)) - return base + return re.sub(r"\{\{([A-Z0-9_]+)\}\}", repl, base) def read_file(filename): diff --git a/chat_exporter/html/base.html b/chat_exporter/html/base.html index bfc06de..2c91e29 100644 --- a/chat_exporter/html/base.html +++ b/chat_exporter/html/base.html @@ -558,11 +558,11 @@ .chatlog__embed { --embed-color: rgba(74, 74, 80, 1); display: grid; - grid-template-columns: minmax(0, 1fr) auto; - column-gap: 1rem; + grid-template-columns: auto; + grid-template-rows: auto; + overflow: hidden; margin-top: 0.35em; - max-width: 520px; - padding: 2px 1rem 1rem 0.75rem; + max-width: 516px; background-color: #37373d; color: #f3f3f4; border: 1px solid #434349; @@ -570,6 +570,9 @@ border-radius: 4px; box-sizing: border-box; font-size: 16px; + padding-block: .5rem 1rem; + padding-inline: .75rem 1rem; + padding-top: .125rem; } .chatlog__embed-content { @@ -622,17 +625,15 @@ .chatlog__embed-description { margin-top: 0.5rem; font-weight: 400; - font-size: 0.95rem; - line-height: 1.375; + font-size: 0.875rem; + line-height: 1.125rem; color: inherit; } .chatlog__embed-fields { display: grid; - grid-template-columns: repeat(auto-fit, minmax(170px, 1fr)); - gap: 0.5rem; - margin-top: 0.5rem; - width: 100%; + margin-top: 8px; + grid-gap: 8px; } .chatlog__embed-fields:empty { @@ -640,19 +641,11 @@ } .chatlog__embed-field { - padding-top: 0; + line-height: 1.125rem; font-size: 0.875em; min-width: 0; } - .chatlog__embed-field--inline { - grid-column: span 1; - } - - .chatlog__embed-field:not(.chatlog__embed-field--inline) { - grid-column: 1 / -1; - } - .chatlog__embed-field-name { margin-bottom: 0.15em; font-weight: 600; @@ -734,8 +727,8 @@ border: 1px solid rgba(255, 255, 255, 0.08); border-radius: 6px; box-shadow: 0 1px 0 rgba(0, 0, 0, 0.15); - min-height: 38px; - padding: 0 12px; + min-height: 24px; + padding: 3px 11px 3px 11px; cursor: pointer; transition: background-color 0.12s ease, transform 0.12s ease; text-decoration: none; @@ -1653,6 +1646,21 @@ border: none; } + h1 { + font-size: 1.5rem; + margin: 16px 0 8px; + } + + h2 { + font-size: 1.25rem; + margin: 16px 0 8px; + } + + h3 { + font-size: 1.125rem; + margin: 16px 0 8px; + } + .chatlog__component-text-display h1, .chatlog__component-text-display h2, .chatlog__component-text-display h3, diff --git a/chat_exporter/html/embed/field-inline.html b/chat_exporter/html/embed/field-inline.html index 2c5aaba..c6c9293 100644 --- a/chat_exporter/html/embed/field-inline.html +++ b/chat_exporter/html/embed/field-inline.html @@ -1,4 +1,4 @@ -
+
{{FIELD_NAME}}
{{FIELD_VALUE}}
\ No newline at end of file diff --git a/chat_exporter/html/embed/field.html b/chat_exporter/html/embed/field.html index b559afb..7801e6e 100644 --- a/chat_exporter/html/embed/field.html +++ b/chat_exporter/html/embed/field.html @@ -1,4 +1,4 @@ -
+
{{FIELD_NAME}}
{{FIELD_VALUE}}
\ No newline at end of file diff --git a/chat_exporter/parse/ast.py b/chat_exporter/parse/ast.py new file mode 100644 index 0000000..981dcf2 --- /dev/null +++ b/chat_exporter/parse/ast.py @@ -0,0 +1,651 @@ +import datetime +import re +import time +from typing import List + +import pytz + + +class Node: + def render(self, guild=None, bot=None) -> str: + raise NotImplementedError() + + +class TextNode(Node): + def __init__(self, text: str): + self.text = text + + def render(self, guild=None, bot=None): + return self.text + + +class ContainerNode(Node): + def __init__(self, children: List[Node]): + self.children = children + + def render_children(self, guild=None, bot=None): + return "".join(c.render(guild, bot) for c in self.children) + + +class BoldNode(ContainerNode): + def render(self, guild=None, bot=None): + return f"{self.render_children(guild, bot)}" + + +class ItalicNode(ContainerNode): + def render(self, guild=None, bot=None): + return f"{self.render_children(guild, bot)}" + + +class UnderlineNode(ContainerNode): + def render(self, guild=None, bot=None): + return f'{self.render_children(guild, bot)}' + + +class StrikethroughNode(ContainerNode): + def render(self, guild=None, bot=None): + return f'{self.render_children(guild, bot)}' + + +class SpoilerNode(ContainerNode): + def render(self, guild=None, bot=None): + return ( + '' + f'{self.render_children(guild, bot)}' + ) + + +class InlineCodeNode(Node): + def __init__(self, code: str): + self.code = code + + def render(self, guild=None, bot=None): + return f'{self.code}' + + +class CodeBlockNode(Node): + def __init__(self, lang: str, code: str): + self.lang = lang + self.code = code + + def render(self, guild=None, bot=None): + lang_class = f"language-{self.lang}" if self.lang else "nohighlight" + return f'
{self.code}
' + + +class QuoteNode(ContainerNode): + def render(self, guild=None, bot=None): + return ( + '
' + f"{self.render_children(guild, bot)}
" + ) + + + +class HeaderNode(ContainerNode): + def __init__(self, level: int, children: List[Node]): + super().__init__(children) + self.level = level + + def render(self, guild=None, bot=None): + return f"{self.render_children(guild, bot)}" + + +class SubtextNode(ContainerNode): + def render(self, guild=None, bot=None): + return f"{self.render_children(guild, bot)}" + + +class LinkNode(ContainerNode): + def __init__(self, url: str, children: List[Node]): + super().__init__(children) + self.url = url + + def render(self, guild=None, bot=None): + return f'{self.render_children(guild, bot)}' + + +class HtmlNode(Node): + def __init__(self, raw: str): + self.raw = raw + + def render(self, guild=None, bot=None): + return self.raw + + +class ListItemNode(ContainerNode): + def __init__(self, indent_level: int, children: List[Node]): + super().__init__(children) + self.indent_level = indent_level + + def render(self, guild=None, bot=None): + return f'
  • {self.render_children(guild, bot)}
  • ' + + +class ListBlockNode(ContainerNode): + def render(self, guild=None, bot=None): + html = '
      \n' + indent_stack = [0] + + for item in self.children: + if not isinstance(item, ListItemNode): + continue + indent = item.indent_level + + if indent % 2 == 0: + while indent < indent_stack[-1]: + html += "
    \n" + indent_stack.pop() + if indent > indent_stack[-1]: + html += '
      \n' + indent_stack.append(indent) + else: + while indent + 1 < indent_stack[-1]: + html += "
    \n" + indent_stack.pop() + if indent + 1 > indent_stack[-1]: + html += '
      \n' + indent_stack.append(indent + 1) + + html += item.render(guild, bot) + "\n" + + while len(indent_stack) > 1: + html += "
    \n" + indent_stack.pop() + html += "" + return html + + +class ChannelMentionNode(Node): + def __init__(self, channel_id: int): + self.channel_id = channel_id + + def render(self, guild=None, bot=None): + channel = guild.get_channel(self.channel_id) if guild else None + if channel is None: + return "#deleted-channel" + return f'#{channel.name}' + + +class UserMentionNode(Node): + ESCAPE_LT = "______lt______" + ESCAPE_GT = "______gt______" + ESCAPE_AMP = "______amp______" + + def __init__(self, user_id: int): + self.user_id = user_id + + def render(self, guild=None, bot=None): + member = None + if guild: + member = guild.get_member(self.user_id) + if not member and bot: + member = bot.get_user(self.user_id) + + if member: + member_name = member.display_name + escaped_name = ( + member_name.replace("<", self.ESCAPE_LT) + .replace(">", self.ESCAPE_GT) + .replace("&", self.ESCAPE_AMP) + ) + return f'@{escaped_name}' + else: + return f'<@{self.user_id}>' + + +class RoleMentionNode(Node): + def __init__(self, role_id: int): + self.role_id = role_id + + def render(self, guild=None, bot=None): + role = guild.get_role(self.role_id) if guild else None + if role is None: + return "@deleted-role" + if role.color.r == 0 and role.color.g == 0 and role.color.b == 0: + colour = "#dee0fc" + else: + colour = "#%02x%02x%02x" % (role.color.r, role.color.g, role.color.b) + return f'@{role.name}' + + +class EveryoneMentionNode(Node): + def render(self, guild=None, bot=None): + return '@everyone' + + +class HereMentionNode(Node): + def render(self, guild=None, bot=None): + return '@here' + + +class SlashCommandNode(Node): + def __init__(self, name: str): + self.name = name + + def render(self, guild=None, bot=None): + return f'/{self.name}' + + +class TimeMentionNode(Node): + CYCLE_SECONDS = 12_622_780_800 + + def __init__(self, timestamp: int, format_str: str, original: str): + self.timestamp = timestamp + self.format_str = format_str + self.original = original + + def render(self, guild=None, bot=None): + timestamp = self.timestamp - 1 + try: + time_stamp = time.gmtime(timestamp) + datetime_stamp = datetime.datetime(2010, *time_stamp[1:6], tzinfo=pytz.utc) + ui_time = datetime_stamp.strftime(self.format_str).replace( + str(datetime_stamp.year), str(time_stamp[0]) + ) + tooltip_time = datetime_stamp.strftime("%A, %e %B %Y at %H:%M").replace( + str(datetime_stamp.year), str(time_stamp[0]) + ) + except (OSError, OverflowError, ValueError): + safe_ts = timestamp % self.CYCLE_SECONDS + years_shifted = (timestamp // self.CYCLE_SECONDS) * 400 + dt = datetime.datetime.fromtimestamp(safe_ts, pytz.utc) + final_year = dt.year + years_shifted + ui_time = dt.strftime(self.format_str).replace(str(dt.year), str(final_year)) + tooltip_time = dt.strftime("%A, %e %B %Y at %H:%M").replace( + str(dt.year), str(final_year) + ) + + original_escaped = self.original.replace("<", "<").replace(">", ">") + return ( + f'' + f"{ui_time}" + "" + ) + + +class AstParser: + def parse(self, text: str) -> List[Node]: + if not text: + return [] + nodes = self._parse_inline(str(text)) + nodes = self._merge_text_nodes(nodes) + nodes = self._merge_quote_nodes(nodes) + nodes = self._merge_list_nodes(nodes) + return nodes + + def _parse_inline(self, text: str) -> List[Node]: + nodes = [] + i = 0 + n = len(text) + + while i < n: + # Check HTML and Mentions + if text[i] == "<" or (text[i : i + 4] == "<"): + is_escaped = text[i] == "&" + + # Channel + chan_match = re.match(r"<#([0-9]+)>" if is_escaped else r"<#([0-9]+)>", text[i:]) + if chan_match: + nodes.append(ChannelMentionNode(int(chan_match.group(1)))) + i += len(chan_match.group(0)) + continue + + # Role + role_match = re.match(r"<@&([0-9]+)>" if is_escaped else r"<@&([0-9]+)>", text[i:]) + if role_match: + nodes.append(RoleMentionNode(int(role_match.group(1)))) + i += len(role_match.group(0)) + continue + + # Member + mem_match = re.match(r"<@!?([0-9]+)>" if is_escaped else r"<@!?([0-9]+)>", text[i:]) + if mem_match: + nodes.append(UserMentionNode(int(mem_match.group(1)))) + i += len(mem_match.group(0)) + continue + + # Slash Command + slash_match = re.match( + r"<\/([\w]+ ?[\w]*):[0-9]+>" if is_escaped else r"<\/([\w]+ ?[\w]*):[0-9]+>", + text[i:], + ) + if slash_match: + nodes.append(SlashCommandNode(slash_match.group(1))) + i += len(slash_match.group(0)) + continue + + # Time + time_patterns = [ + [r"<t:([0-9]{1,13}):t>", "%H:%M"], + [r"<t:([0-9]{1,13}):T>", "%T"], + [r"<t:([0-9]{1,13}):d>", "%d/%m/%Y"], + [r"<t:([0-9]{1,13}):D>", "%e %B %Y"], + [r"<t:([0-9]{1,13}):f>", "%e %B %Y %H:%M"], + [r"<t:([0-9]{1,13}):F>", "%A, %e %B %Y %H:%M"], + [r"<t:([0-9]{1,13}):R>", "%e %B %Y %H:%M"], + [r"<t:([0-9]{1,13})>", "%e %B %Y %H:%M"], + ] if is_escaped else [ + [r"", "%H:%M"], + [r"", "%T"], + [r"", "%d/%m/%Y"], + [r"", "%e %B %Y"], + [r"", "%e %B %Y %H:%M"], + [r"", "%A, %e %B %Y %H:%M"], + [r"", "%e %B %Y %H:%M"], + [r"", "%e %B %Y %H:%M"], + ] + time_found = False + for pattern, strf in time_patterns: + t_match = re.match(pattern, text[i:]) + if t_match: + nodes.append(TimeMentionNode(int(t_match.group(1)), strf, t_match.group(0))) + i += len(t_match.group(0)) + time_found = True + break + if time_found: + continue + + # HTML fallback + if text[i] == "<": + tag_match = re.match(r"(<[^>]+>)", text[i:]) + if tag_match: + nodes.append(HtmlNode(tag_match.group(1))) + i += len(tag_match.group(1)) + continue + + # Newline handler (crucial to restart cursor for block elements) + if text[i] == "\n": + nodes.append(TextNode("\n")) + i += 1 + continue + + # Code block ``` + if text[i : i + 3] == "```": + endtag = text.find("```", i + 3) + if endtag != -1: + inner = text[i + 3 : endtag] + lines = inner.split("\n", 1) + if len(lines) > 1 and " " not in lines[0]: + lang = lines[0] + code = lines[1] + else: + lang = "" + code = inner + if code.startswith("\n"): + code = code[1:] + if code.endswith("\n"): + code = code[:-1] + nodes.append(CodeBlockNode(lang, code)) + i = endtag + 3 + continue + + # Code block `` + if text[i : i + 2] == "``": + endtag = text.find("``", i + 2) + if endtag != -1: + nodes.append(InlineCodeNode(text[i + 2 : endtag])) + i = endtag + 2 + continue + + # Inline code ` + if text[i] == "`": + endtag = text.find("`", i + 1) + if endtag != -1: + nodes.append(InlineCodeNode(text[i + 1 : endtag])) + i = endtag + 1 + continue + + # Bold + if text[i : i + 2] == "**": + endtag = text.find("**", i + 2) + if endtag != -1: + nodes.append(BoldNode(self._parse_inline(text[i + 2 : endtag]))) + i = endtag + 2 + continue + + # Underline + if text[i : i + 2] == "__": + endtag = text.find("__", i + 2) + if endtag != -1: + nodes.append(UnderlineNode(self._parse_inline(text[i + 2 : endtag]))) + i = endtag + 2 + continue + + # Strikethrough + if text[i : i + 2] == "~~": + endtag = text.find("~~", i + 2) + if endtag != -1: + nodes.append(StrikethroughNode(self._parse_inline(text[i + 2 : endtag]))) + i = endtag + 2 + continue + + # Spoiler + if text[i : i + 2] == "||": + endtag = text.find("||", i + 2) + if endtag != -1: + nodes.append(SpoilerNode(self._parse_inline(text[i + 2 : endtag]))) + i = endtag + 2 + continue + + # Italic 1 + if text[i] == "*": + endtag = text.find("*", i + 1) + if endtag != -1 and text[i : i + 2] != "**": + nodes.append(ItalicNode(self._parse_inline(text[i + 1 : endtag]))) + i = endtag + 1 + continue + + # Italic 2 + if text[i] == "_": + endtag = text.find("_", i + 1) + # Ensure it's not part of __ and ideally isolated, but basic parse for discord + if endtag != -1 and text[i : i + 2] != "__": + nodes.append(ItalicNode(self._parse_inline(text[i + 1 : endtag]))) + i = endtag + 1 + continue + + # Everyone / Here + if text[i] == "@": + everyone_match = re.match(r"@(everyone)(?:[$\s\t\n\f\r\0]|$)", text[i:]) + if everyone_match: + nodes.append(EveryoneMentionNode()) + i += 9 # len("@everyone") + continue + here_match = re.match(r"@(here)(?:[$\s\t\n\f\r\0]|$)", text[i:]) + if here_match: + nodes.append(HereMentionNode()) + i += 5 # len("@here") + continue + + # Headers + if (i == 0 or text[i - 1] == "\n") and text[i] == "#": + level_match = re.match(r"^(#{1,3})\s+", text[i:]) + if level_match: + level = len(level_match.group(1)) + prefix_len = len(level_match.group(0)) + endtag = text.find("\n", i + prefix_len) + if endtag == -1: + endtag = n + nodes.append(HeaderNode(level, self._parse_inline(text[i + prefix_len : endtag]))) + # consume the trailing newline if present, DO NOT append it so we don't get
    + if endtag < n: + i = endtag + 1 + while i < n and text[i] == "\n": + i += 1 + else: + break + continue + + # Subtext + if (i == 0 or text[i - 1] == "\n") and text[i : i + 3] == "-# ": + endtag = text.find("\n", i + 3) + if endtag == -1: + endtag = n + nodes.append(SubtextNode(self._parse_inline(text[i + 3 : endtag]))) + if endtag < n: + nodes.append(TextNode("\n")) + i = endtag + 1 + else: + break + continue + + # Blockquote (>>>) + if (i == 0 or text[i - 1] == "\n") and text[i : i + 13] == ">>> ": + nodes.append(QuoteNode(self._parse_inline(text[i + 13 :]))) + break + + # Single line quote (>) + if (i == 0 or text[i - 1] == "\n") and (text[i : i + 5] == "> " or text[i : i + 4] == ">"): + prefix_len = 5 if text[i : i + 5] == "> " else 4 + endtag = text.find("\n", i + prefix_len) + if endtag == -1: + endtag = n + nodes.append(QuoteNode(self._parse_inline(text[i + prefix_len : endtag]))) + + if endtag < n: + i = endtag + 1 + continue + else: + break + + # Lists + if i == 0 or text[i - 1] == "\n": + list_match = re.match(r"^(\s*)([-*])\s+", text[i:]) + if list_match: + indent = len(list_match.group(1)) + prefix_len = len(list_match.group(0)) + endtag = text.find("\n", i + prefix_len) + if endtag == -1: + endtag = n + nodes.append(ListItemNode(indent, self._parse_inline(text[i + prefix_len : endtag]))) + if endtag < n: + i = endtag + 1 + else: + break + continue + + # Link [text](url) + if text[i] == "[": + close_bracket = text.find("](", i + 1) + if close_bracket != -1: + end_paren = text.find(")", close_bracket + 2) + if end_paren != -1: + link_text = text[i + 1 : close_bracket] + link_url = text[close_bracket + 2 : end_paren] + nodes.append(LinkNode(link_url, self._parse_inline(link_text))) + i = end_paren + 1 + continue + + # Raw HTTP + if text[i : i + 4] == "http": + match = re.search(r"^https?://[^\s<*\n\)]+", text[i:]) + if match: + url = match.group(0) + nodes.append(LinkNode(url, [TextNode(url)])) + i += len(url) + continue + + # Fallback consume characters to next special marker + next_special_options = [ + text.find("<", i + 1), + text.find("`", i + 1), + text.find("*", i + 1), + text.find("_", i + 1), + text.find("~", i + 1), + text.find("|", i + 1), + text.find("[", i + 1), + text.find("h", i + 1), + text.find("\n", i + 1), + text.find("&", i + 1), + text.find("#", i + 1), + text.find("-", i + 1), + text.find("@", i + 1), + ] + valid_specials = [pos for pos in next_special_options if pos != -1] + next_special = min(valid_specials) if valid_specials else n + + if next_special == i: # rare edge case fallback + nodes.append(TextNode(text[i])) + i += 1 + else: + raw_text = text[i:next_special] + # small mitigation for missing < inside url but we already parse urls + nodes.append(TextNode(raw_text)) + i = next_special + + return nodes + + def _merge_text_nodes(self, nodes: List[Node]) -> List[Node]: + merged = [] + for node in nodes: + if isinstance(node, TextNode): + node.text = node.text.replace("\n", "
    ") + if merged and isinstance(merged[-1], TextNode): + merged[-1].text += node.text + else: + merged.append(node) + else: + if isinstance(node, ContainerNode): + node.children = self._merge_text_nodes(node.children) + merged.append(node) + return merged + + def _merge_quote_nodes(self, nodes: List[Node]) -> List[Node]: + merged = [] + pending_spaces = [] + for node in nodes: + if isinstance(node, ContainerNode): + node.children = self._merge_quote_nodes(node.children) + + if isinstance(node, QuoteNode): + pending_spaces.clear() + if merged and isinstance(merged[-1], QuoteNode): + merged[-1].children.append(TextNode("
    ")) + merged[-1].children.extend(node.children) + else: + merged.append(node) + elif ( + merged + and isinstance(merged[-1], QuoteNode) + and isinstance(node, TextNode) + and node.text.replace("
    ", "").strip() == "" + ): + pending_spaces.append(node) + else: + if pending_spaces: + merged.extend(pending_spaces) + pending_spaces.clear() + merged.append(node) + if pending_spaces: + merged.extend(pending_spaces) + return merged + + def _merge_list_nodes(self, nodes: List[Node]) -> List[Node]: + merged = [] + current_list = [] + pending_spaces = [] + + def commit_list(): + if current_list: + merged.append(ListBlockNode(current_list.copy())) + current_list.clear() + if pending_spaces: + merged.extend(pending_spaces) + pending_spaces.clear() + + for node in nodes: + if isinstance(node, ContainerNode): + node.children = self._merge_list_nodes(node.children) + + if isinstance(node, ListItemNode): + pending_spaces.clear() + current_list.append(node) + elif current_list and isinstance(node, TextNode) and node.text.replace("
    ", "").strip() == "": + pending_spaces.append(node) + else: + commit_list() + merged.append(node) + + commit_list() + return merged diff --git a/chat_exporter/parse/markdown.py b/chat_exporter/parse/markdown.py index 69ce8d6..e81b449 100644 --- a/chat_exporter/parse/markdown.py +++ b/chat_exporter/parse/markdown.py @@ -1,57 +1,77 @@ -import html import re +from typing import Optional + +from chat_exporter.ext.discord_import import discord from chat_exporter.ext.emoji_convert import convert_emoji +from chat_exporter.parse.ast import AstParser + +bot: Optional[discord.Client] = None + + +def pass_bot(_bot): + # Bot is used to fetch a user who is no longer inside a guild + # This will stop the user from appearing as 'Unknown' which some people do not want + global bot + bot = _bot class ParseMarkdown: - def __init__(self, content): + def __init__(self, content, guild=None, _bot=None): self.content = content - self.code_blocks_content = [] + self.guild = guild + self.bot = _bot or bot + self.code_blocks = [] + + def parse_code_block_markdown(self): + # Shield multiline code blocks + def repl_multiline(match): + self.code_blocks.append(match.group(0)) + return f"{{{{CODE_BLOCK_{len(self.code_blocks) - 1}}}}}" + + self.content = re.sub(r"```.*?```", repl_multiline, self.content, flags=re.DOTALL) + + # Shield inline code + def repl_inline(match): + self.code_blocks.append(match.group(0)) + return f"{{{{CODE_BLOCK_{len(self.code_blocks) - 1}}}}}" + + self.content = re.sub(r"`.*?`", repl_inline, self.content, flags=re.DOTALL) + def reverse_code_block_markdown(self): + for i, block in enumerate(self.code_blocks): + self.content = self.content.replace(f"{{{{CODE_BLOCK_{i}}}}}", block) async def standard_message_flow(self): return await self.standard_embed_flow() async def link_embed_flow(self): - self.parse_embed_markdown() + ast = AstParser() + nodes = ast.parse(self.content) + self.content = "".join(n.render(self.guild, self.bot) for n in nodes) await self.parse_emoji() + return self.content async def standard_embed_flow(self): - self.parse_code_block_markdown() - self.https_http_links() - self.parse_embed_markdown() - self.parse_normal_markdown() - + ast = AstParser() + nodes = ast.parse(self.content) + self.content = "".join(n.render(self.guild, self.bot) for n in nodes) await self.parse_emoji() - self.reverse_code_block_markdown() return self.content async def special_embed_flow(self): - self.https_http_links() - self.parse_code_block_markdown() - self.parse_normal_markdown() - - await self.parse_emoji() - self.reverse_code_block_markdown() - return self.content + return await self.standard_embed_flow() async def message_reference_flow(self): self.strip_preserve() - self.parse_code_block_markdown(reference=True) - self.https_http_links() - self.parse_embed_markdown() - self.parse_normal_markdown() - self.reverse_code_block_markdown() - self.parse_br() - - return self.content + return await self.standard_embed_flow() async def special_emoji_flow(self): await self.parse_emoji() return self.content - def parse_br(self): - self.content = self.content.replace("
    ", " ") + def strip_preserve(self): + p = r'(.*?)' + self.content = re.sub(p, r"\1", self.content) async def parse_emoji(self): holder = ( @@ -63,337 +83,12 @@ async def parse_emoji(self): self.content = await convert_emoji([word for word in self.content]) - for x in holder: - p, r = x - match = re.search(p, self.content) - while match is not None: - emoji_id = match.group(1) - self.content = self.content.replace(self.content[match.start():match.end()], - r % emoji_id) - match = re.search(p, self.content) - - def strip_preserve(self): - p = r'(.*)' - r = '%s' - - pattern = re.compile(p) - match = re.search(pattern, self.content) - while match is not None: - affected_text = match.group(1) - self.content = self.content.replace(self.content[match.start():match.end()], - r % affected_text) - match = re.search(pattern, self.content) - - def order_list_markdown_to_html(self): - lines = self.content.split('\n') - html = '' - indent_stack = [0] - started = True - - for line in lines: - match = re.match(r'^(\s*)([-*])\s+(.+)$', line) - if match: - indent, bullet, content = match.groups() - indent = len(indent) - - if started: - html += '
      \n' - started = False - if indent % 2 == 0: - while indent < indent_stack[-1]: - html += '
    \n' - indent_stack.pop() - if indent > indent_stack[-1]: - html += '
      \n' - indent_stack.append(indent) - else: - while indent + 1 < indent_stack[-1]: - html += '
    \n' - indent_stack.pop() - if indent + 1 > indent_stack[-1]: - html += '
      \n' - indent_stack.append(indent + 1) - - html += f'
    • {content.strip()}
    • \n' - else: - while len(indent_stack) > 1: - html += '
    ' - indent_stack.pop() - if not started: - html += '' - started = True - html += line + '\n' - - while len(indent_stack) > 1: - html += '\n' - indent_stack.pop() - - self.content = html - - def parse_normal_markdown(self): - self.order_list_markdown_to_html() - holder = ( - [r"__(.*?)__", '%s'], - [r"\*\*(.*?)\*\*", '%s'], - [r"\*(.*?)\*", '%s'], - [r"(?%s'], - [r"~~(.*?)~~", '%s'], - [r"^###\s(.*?)\n", '

    %s

    '], - [r"^##\s(.*?)\n", '

    %s

    '], - [r"^#\s(.*?)\n", '

    %s

    '], - [r"^-#\s(.*?)\n", '%s'], - [r"\|\|(.*?)\|\|", ' %s'], - ) - - for x in holder: - p, r = x - - pattern = re.compile(p, re.M) - match = re.search(pattern, self.content) - while match is not None: - affected_text = match.group(1) - self.content = self.content.replace(self.content[match.start():match.end()], r % affected_text) - match = re.search(pattern, self.content) - - # > quote (group consecutive lines into a single block so the bar spans them) - self.content = self.merge_quote_lines(self.content) - - def parse_code_block_markdown(self, reference=False): - markdown_languages = ["asciidoc", "autohotkey", "bash", "coffeescript", "cpp", "cs", "css", - "diff", "fix", "glsl", "ini", "json", "md", "ml", "prolog", "py", - "tex", "xl", "xml", "js", "html"] - self.content = re.sub(r"\n", "
    ", self.content) - - # ```code``` - pattern = re.compile(r"```(.*?)```") - match = re.search(pattern, self.content) - while match is not None: - language_class = "nohighlight" - affected_text = match.group(1) - - for language in markdown_languages: - if affected_text.lower().startswith(language): - language_class = f"language-{language}" - _, _, affected_text = affected_text.partition('
    ') - - affected_text = self.return_to_markdown(affected_text) - - second_pattern = re.compile(r"^
    |
    $") - second_match = re.search(second_pattern, affected_text) - while second_match is not None: - affected_text = re.sub(r"^
    |
    $", '', affected_text) - second_match = re.search(second_pattern, affected_text) - affected_text = re.sub(" ", "  ", affected_text) - - self.code_blocks_content.append(affected_text) - if not reference: - self.content = self.content.replace( - self.content[match.start():match.end()], - '
    %s
    ' % (language_class, f'%s{len(self.code_blocks_content)}') - ) - else: - self.content = self.content.replace( - self.content[match.start():match.end()], - '%s' % f'%s{len(self.code_blocks_content)}' - ) - - match = re.search(pattern, self.content) - - # ``code`` - pattern = re.compile(r"``(.*?)``") - match = re.search(pattern, self.content) - while match is not None: - affected_text = match.group(1) - affected_text = self.return_to_markdown(affected_text) - self.code_blocks_content.append(affected_text) - self.content = self.content.replace(self.content[match.start():match.end()], - '%s' % f'%s{len(self.code_blocks_content)}') - match = re.search(pattern, self.content) - - # `code` - pattern = re.compile(r"`(.*?)`") - match = re.search(pattern, self.content) - while match is not None: - affected_text = match.group(1) - affected_text = self.return_to_markdown(affected_text) - self.code_blocks_content.append(affected_text) - self.content = self.content.replace(self.content[match.start():match.end()], - '%s' % f'%s{len(self.code_blocks_content)}') - match = re.search(pattern, self.content) - - self.content = re.sub(r"
    ", "\n", self.content) - - def reverse_code_block_markdown(self): - for x in range(len(self.code_blocks_content)): - self.content = self.content.replace(f'%s{x + 1}', self.code_blocks_content[x]) - - def parse_embed_markdown(self): - # [Message](Link) - pattern = re.compile(r"\[(.+?)]\((https?://[^\s)]+)\)") - match = re.search(pattern, self.content) - while match is not None: - affected_text = match.group(1) - affected_url = match.group(2) - self.content = self.content.replace(self.content[match.start():match.end()], - '%s' % (affected_url, affected_text)) - match = re.search(pattern, self.content) - - self.content = self.merge_quote_lines(self.content) - - @staticmethod - def order_list_html_to_markdown(content): - lines = content.split('
    ') - html = '' - ul_level = -1 - - for line in lines: - if '
      ' in line: - ul_level += 1 - line = line.replace('
        ', '') - if line != "": - html += line + "\n" - elif "
      " in line: - ul_level -= 1 - elif '
    • ' in line: - match = re.match(r'
    • (.+?)
    • ', line) - if match: - matched_content = match.group(1) - spaces = ul_level * 2 - html += " " * spaces + "-" + matched_content + "\n" - else: - html += line - else: - html += line - - return html - - def return_to_markdown(self, content): - # content = self.order_list_html_to_markdown(content) - holders = ( - [r"(.*?)", '**%s**'], - [r"([^<>]+)", '*%s*'], - [r"

      ([^<>]+)

      ", '# %s'], - [r"

      ([^<>]+)

      ", '## %s'], - [r"

      ([^<>]+)

      ", '### %s'], - [r'([^<>]+)', '__%s__'], - [r'([^<>]+)', '~~%s~~'], - [r'
      (.*?)
      ', '> %s'], - [r' (.*?)<\/span><\/span>', '||%s||'], - [r'.*?', '%s'] - ) - - for x in holders: - p, r = x - - pattern = re.compile(p) - match = re.search(pattern, content) - while match is not None: - affected_text = match.group(1) - content = content.replace(content[match.start():match.end()], - r % html.escape(affected_text)) - match = re.search(pattern, content) - - pattern = re.compile(r'(.*?)') - match = re.search(pattern, content) - while match is not None: - affected_url = match.group(1) - affected_text = match.group(2) - if affected_url != affected_text: - content = content.replace(content[match.start():match.end()], - '[%s](%s)' % (affected_text, affected_url)) - else: - content = content.replace(content[match.start():match.end()], - '%s' % affected_url) - match = re.search(pattern, content) - - return content.lstrip().rstrip() - - @staticmethod - def merge_quote_lines(content: str) -> str: - """ - Convert consecutive blockquote-style lines into a single quote block so the visual bar spans all lines. - """ - lines = content.split("\n") - merged_content = [] - quote_buffer = [] - quote_pattern = re.compile(r"^(?:>|>)\s?(.*)") - - for line in lines: - match = quote_pattern.match(line) - if match: - quote_buffer.append(match.group(1)) - else: - if quote_buffer: - quote_text = "\n".join(quote_buffer) - merged_content.append(f'
      {quote_text}
      ') - quote_buffer = [] - merged_content.append(line) - - if quote_buffer: - quote_text = "\n".join(quote_buffer) - merged_content.append(f'
      {quote_text}
      ') - - merged = "\n".join(merged_content) - merged = re.sub(r"
    [ \t]*\n(?!\n)", "
    ", merged) - return merged - - def https_http_links(self): - def remove_silent_link(url, raw_url=None): - pattern = rf"`.*{raw_url}.*`" - match = re.search(pattern, self.content) - - if "<" in url and ">" in url and not match: - return url.replace("<", "").replace(">", "") - return url - - content = re.sub("\n", "
    ", self.content) - output = [] - if "http://" in content or "https://" in content: - for word in content.replace("
    ", "
    ").split(): + for p, r in holder: - # Skip markdown links to avoid wrapping the URL twice - if "](" in word: - output.append(word) - continue + def make_repl(template): + def repl(match): + return template % match.group(1) - if "http" not in word: - output.append(word) - continue + return repl - if "<" in word and ">" in word: - pattern = r"<https?:\/\/(.*)>" - match_url = re.search(pattern, word) - if match_url: - match_url = match_url.group(1) - url = f'https://{match_url}' - word = word.replace("https://" + match_url, url) - word = word.replace("http://" + match_url, url) - output.append(remove_silent_link(word, match_url)) - elif "https://" in word: - pattern = r"https://[^\s>`\"*]*" - word_link = re.search(pattern, word) - if word_link and word_link.group().endswith(")"): - output.append(word) - continue - elif word_link: - word_link = word_link.group() - word_full = f'{word_link}' - word = re.sub(pattern, word_full, word) - output.append(remove_silent_link(word)) - elif "http://" in word: - pattern = r"http://[^\s>`\"*]*" - word_link = re.search(pattern, word) - if word_link and word_link.group().endswith(")"): - output.append(word) - continue - elif word_link: - word_link = word_link.group() - word_full = f'{word_link}' - word = re.sub(pattern, word_full, word) - output.append(remove_silent_link(word)) - else: - output.append(word) - content = " ".join(output) - self.content = re.sub("
    ", "\n", content) + self.content = re.sub(p, make_repl(r), self.content) diff --git a/chat_exporter/parse/mention.py b/chat_exporter/parse/mention.py deleted file mode 100644 index 75b4d22..0000000 --- a/chat_exporter/parse/mention.py +++ /dev/null @@ -1,233 +0,0 @@ -import re -from typing import Optional - -import pytz -import datetime -import time - -from chat_exporter.ext.discord_import import discord -from chat_exporter.parse.markdown import ParseMarkdown -bot: Optional[discord.Client] = None - - -def pass_bot(_bot): - # Bot is used to fetch a user who is no longer inside a guild - # This will stop the user from appearing as 'Unknown' which some people do not want - global bot - bot = _bot - - -class ParseMention: - REGEX_ROLES = r"<@&([0-9]+)>" - REGEX_ROLES_2 = r"<@&([0-9]+)>" - REGEX_EVERYONE = r"@(everyone)(?:[$\s\t\n\f\r\0]|$)" - REGEX_HERE = r"@(here)(?:[$\s\t\n\f\r\0]|$)" - REGEX_MEMBERS = r"<@!?([0-9]+)>" - REGEX_MEMBERS_2 = r"<@!?([0-9]+)>" - REGEX_CHANNELS = r"<#([0-9]+)>" - REGEX_CHANNELS_2 = r"<#([0-9]+)>" - REGEX_EMOJIS = r"<a?(:[^\n:]+:)[0-9]+>" - REGEX_EMOJIS_2 = r"" - REGEX_TIME_HOLDER = ( - [r"<t:([0-9]{1,13}):t>", "%H:%M"], - [r"<t:([0-9]{1,13}):T>", "%T"], - [r"<t:([0-9]{1,13}):d>", "%d/%m/%Y"], - [r"<t:([0-9]{1,13}):D>", "%e %B %Y"], - [r"<t:([0-9]{1,13}):f>", "%e %B %Y %H:%M"], - [r"<t:([0-9]{1,13}):F>", "%A, %e %B %Y %H:%M"], - [r"<t:([0-9]{1,13}):R>", "%e %B %Y %H:%M"], - [r"<t:([0-9]{1,13})>", "%e %B %Y %H:%M"] - ) - REGEX_SLASH_COMMAND = r"<\/([\w]+ ?[\w]*):[0-9]+>" - CYCLE_SECONDS = 12_622_780_800 # Exactly 400 years in seconds - ESCAPE_LT = "______lt______" - ESCAPE_GT = "______gt______" - ESCAPE_AMP = "______amp______" - - def __init__(self, content, guild): - self.content = content - self.guild = guild - self.code_blocks_content = [] - - async def flow(self): - markdown = ParseMarkdown(self.content) - markdown.parse_code_block_markdown() - self.content = markdown.content - await self.escape_mentions() - await self.escape_mentions() - await self.unescape_mentions() - await self.channel_mention() - await self.member_mention() - await self.role_mention() - await self.time_mention() - await self.slash_command_mention() - markdown.content = self.content - markdown.reverse_code_block_markdown() - self.content = markdown.content - return self.content - - - async def escape_mentions(self): - content = "" - previous_match_end = 0 - for match in re.finditer("(%s|%s|%s|%s|%s|%s|%s|%s)" - % (self.REGEX_ROLES, self.REGEX_MEMBERS, self.REGEX_CHANNELS, self.REGEX_EMOJIS, - self.REGEX_ROLES_2, self.REGEX_MEMBERS_2, self.REGEX_CHANNELS_2, - self.REGEX_EMOJIS_2), self.content): - pre_content = self.content[previous_match_end:match.start()] - post_content = self.content[match.end():] - match_content = self.content[match.start():match.end()] - match_content = await self.escape_mention_starters(match_content) - - content += pre_content + match_content - previous_match_end = match.end() - if previous_match_end < len(self.content) - 1: - content += self.content[previous_match_end:] - self.content = content - - async def unescape_mentions(self, content: str = None): - had_content = content is not None - if content is None: - content = self.content - content = content.replace(self.ESCAPE_LT, "<") - content = content.replace(self.ESCAPE_GT, ">") - content = content.replace(self.ESCAPE_AMP, "&") - if not had_content: - self.content = content - return content - - async def escape_mention_starters(self, content: str = None): - had_content = content is not None - if content is None: - content = self.content - content = content.replace("<", self.ESCAPE_LT) - content = content.replace(">", self.ESCAPE_GT) - content = content.replace("&", self.ESCAPE_AMP) - if not had_content: - self.content = content - return content - - async def channel_mention(self): - holder = self.REGEX_CHANNELS, self.REGEX_CHANNELS_2 - for regex in holder: - match = re.search(regex, self.content) - while match is not None: - channel_id = int(match.group(1)) - channel = self.guild.get_channel(channel_id) - - if channel is None: - replacement = '#deleted-channel' - else: - replacement = '#%s' \ - % (channel.id, channel.name) - self.content = self.content.replace(self.content[match.start():match.end()], replacement) - - match = re.search(regex, self.content) - - async def role_mention(self): - holder = self.REGEX_EVERYONE, self.REGEX_HERE - for regex in holder: - match = re.search(regex, self.content) - while match is not None: - role_name = match.group(1) - replacement = '@%s' % (str(role_name), str(role_name)) - - self.content = self.content.replace(self.content[match.start():match.end()], - replacement) - match = re.search(regex, self.content) - holder = self.REGEX_ROLES, self.REGEX_ROLES_2 - for regex in holder: - match = re.search(regex, self.content) - while match is not None: - role_id = int(match.group(1)) - role = self.guild.get_role(role_id) - - if role is None: - replacement = '@deleted-role' - else: - if role.color.r == 0 and role.color.g == 0 and role.color.b == 0: - colour = "#dee0fc" - else: - colour = "#%02x%02x%02x" % (role.color.r, role.color.g, role.color.b) - replacement = '@%s' % (colour, role.name) - self.content = self.content.replace(self.content[match.start():match.end()], replacement) - match = re.search(regex, self.content) - - async def slash_command_mention(self): - match = re.search(self.REGEX_SLASH_COMMAND, self.content) - while match is not None: - slash_command_name = match.group(1) - replacement = ( - '/%s' - % (slash_command_name, slash_command_name) - ) - self.content = self.content.replace(self.content[match.start():match.end()], replacement) - - match = re.search(self.REGEX_SLASH_COMMAND, self.content) - - async def member_mention(self): - holder = self.REGEX_MEMBERS, self.REGEX_MEMBERS_2 - for regex in holder: - match = re.search(regex, self.content) - while match is not None: - member_id = int(match.group(1)) - - member = None - try: - member = self.guild.get_member(member_id) or bot.get_user(member_id) - member_name = member.display_name - except AttributeError: - member_name = member - member_name = await self.escape_mention_starters(member_name) - if member is not None: - replacement = '@%s' \ - % (str(member_id), str(member_name)) - else: - replacement = '<@%s>' \ - % (str(member_id), str(member_id)) - self.content = self.content.replace(self.content[match.start():match.end()], - replacement) - - match = re.search(regex, self.content) - - await self.unescape_mentions() - - async def time_mention(self): - holder = self.REGEX_TIME_HOLDER - - for p in holder: - regex, strf = p - match = re.search(regex, self.content) - while match is not None: - timestamp = int(match.group(1)) - 1 - try: - time_stamp = time.gmtime(timestamp) - datetime_stamp = datetime.datetime(2010, *time_stamp[1:6], tzinfo=pytz.utc) - ui_time = datetime_stamp.strftime(strf) - ui_time = ui_time.replace(str(datetime_stamp.year), str(time_stamp[0])) - tooltip_time = datetime_stamp.strftime("%A, %e %B %Y at %H:%M") - tooltip_time = tooltip_time.replace(str(datetime_stamp.year), str(time_stamp[0])) - except (OSError, OverflowError, ValueError): - # overflow error occurs when timestamp is too large, manual parsing - # Project the timestamp into a safe range that doesn't cause issues with system or python limitations - # Strip out 400-year chunks until the timestamp fits in Python's logic - safe_ts = timestamp % self.CYCLE_SECONDS - years_shifted = (timestamp // self.CYCLE_SECONDS) * 400 - # Create the datetime object using the safe timestamp - dt = datetime.datetime.fromtimestamp(safe_ts, pytz.utc) - # Format normally, but inject the real year calculation - final_year = dt.year + years_shifted - ui_time = dt.strftime(strf) - ui_time = ui_time.replace(str(dt.year), str(final_year)) - tooltip_time = dt.strftime("%A, %e %B %Y at %H:%M") - tooltip_time = tooltip_time.replace(str(dt.year), str(final_year)) - original = match.group().replace("<", "<").replace(">", ">") - replacement = ( - f'' - f'{ui_time}' - ) - - self.content = self.content.replace(self.content[match.start():match.end()], - replacement) - - match = re.search(regex, self.content) diff --git a/pyproject.toml b/pyproject.toml index 7632de7..fc48dbf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "chat_exporter" description = "A simple Discord chat exporter for Python Discord bots." -version = "3.0.1" +version = "3.1.0" readme = "README.md" authors = [ { name="mahtoid", email="info@mahto.id" } @@ -32,6 +32,36 @@ classifiers = [ dependencies = ["aiohttp", "pytz", "grapheme", "emoji"] keywords = ["chat exporter", "discord chat exporter", "discord", "discordpy", "disnake", "pycord", "nextcord"] +[project.optional-dependencies] +dev = [ + "ruff", + "pytest", + "pytest-asyncio", +] + +[tool.ruff] +# Target version for the generated code. +target-version = "py311" +line-length = 120 + +[tool.ruff.lint] + +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Also enable security rules (`S`) and isort (`I`). +select = ["E4", "E7", "E9", "F", "S", "I"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" + + [project.urls] Homepage = "https://github.com/mahtoid/DiscordChatExporterPy" Discord = "https://discord.mahto.id/" diff --git a/tests/test_ast.py b/tests/test_ast.py new file mode 100644 index 0000000..ef2073d --- /dev/null +++ b/tests/test_ast.py @@ -0,0 +1,156 @@ +import unittest + +from chat_exporter.parse.ast import AstParser + + +class TestAST(unittest.TestCase): + def setUp(self): + self.parser = AstParser() + + def test_basic_formatting(self): + text = "**Bold** and *Italic* and __Underline__ and ~~Strikethrough~~" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual( + out, + ( + 'Bold and Italic and ' + 'Underline and ' + 'Strikethrough' + ), + ) + + + + def test_inline_code(self): + text = "This is `inline code`" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual(out, 'This is inline code') + + def test_code_block(self): + text = "```python\nprint('hello')\n```" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual(out, "
    print('hello')
    ") + + def test_single_line_quote(self): + # Using html escaped > for Discord's quote logic + text = "> Quote 1\n> Quote 2" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual( + out, + '
    Quote 1
    Quote 2
    ', + ) + + def test_multiline_blockquote(self): + text = ">>> Multiline\nQuote\nBlock" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual( + out, + '
    Multiline
    Quote
    Block
    ', + ) + + def test_heading_newline_stripping(self): + text = "# Heading\n\n\n\nTest" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual(out, "

    Heading

    Test") + + def test_subtext_newline_preservation(self): + text = "-# And some subtext!\nTest" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual(out, "And some subtext!
    Test") + + def test_combined_edge_cases(self): + # Simulate the full test message as Discord would escape it + text = ( + "> Blockquote line 1\n> Blockquote line 2\n>>> Multiline\nQuote\nBlock\n-# And some subtext!" + ) + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + + # The two single-line quotes should be merged into one quote block + self.assertIn("Blockquote line 1
    Blockquote line 2", out) + # The >>> multiline quote should consume the rest (including subtext) + self.assertIn("Multiline
    Quote
    Block", out) + # Subtext should be rendered in the multiline quote + self.assertIn("And some subtext!", out) + # All content should be inside quote wrappers + self.assertIn('
    ', out) + self.assertIn('
    ', out) + + def test_mentions(self): + # Mock guild and roles/channels + class MockRole: + def __init__(self, id, name, r, g, b): + self.id = id + self.name = name + self.color = type('obj', (object,), {'r': r, 'g': g, 'b': b}) + + class MockChannel: + def __init__(self, id, name): + self.id = id + self.name = name + + class MockMember: + def __init__(self, id, display_name): + self.id = id + self.display_name = display_name + + class MockGuild: + def get_role(self, id): + if id == 123: + return MockRole(123, "TestRole", 255, 0, 0) + return None + def get_channel(self, id): + if id == 456: + return MockChannel(456, "test-channel") + return None + def get_member(self, id): + if id == 789: + return MockMember(789, "TestMember") + return None + + guild = MockGuild() + + # Test Channel Mention + text = "<#456> and <#456>" + nodes = self.parser.parse(text) + out = "".join(n.render(guild) for n in nodes) + self.assertIn('#test-channel', out) + self.assertEqual(out.count('#test-channel'), 2) + + # Test Role Mention + text = "<@&123> and <@&123>" + nodes = self.parser.parse(text) + out = "".join(n.render(guild) for n in nodes) + self.assertIn('@TestRole', out) + self.assertIn('color: #ff0000', out) + + # Test Member Mention + text = "<@789> and <@!789>" + nodes = self.parser.parse(text) + out = "".join(n.render(guild) for n in nodes) + self.assertIn('@TestMember', out) + + # Test Everyone/Here + text = "@everyone and @here" + nodes = self.parser.parse(text) + out = "".join(n.render(guild) for n in nodes) + self.assertIn('@everyone', out) + self.assertIn('@here', out) + + # Test Time Mention + text = "" + nodes = self.parser.parse(text) + out = "".join(n.render(guild) for n in nodes) + self.assertIn('unix-timestamp', out) + self.assertIn('raw-content="<t:1614556800:R>"', out) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_export.py b/tests/test_export.py new file mode 100644 index 0000000..c9fa0d9 --- /dev/null +++ b/tests/test_export.py @@ -0,0 +1,449 @@ +""" +Integration tests for chat_exporter using mocked Discord objects. + +These tests exercise the full rendering pipeline (raw_export) without +needing a live Discord connection. A small HTML artifact is saved to +tests/artifacts/ for visual inspection if needed. +""" + +import asyncio +import os +import unittest +from datetime import datetime +from unittest.mock import MagicMock + +import pytz + +import chat_exporter +from chat_exporter.ext.discord_import import discord + +ARTIFACTS_DIR = os.path.join(os.path.dirname(__file__), "artifacts") + + +def _make_guild(name="Test Guild", guild_id=111111111111111111): + guild = MagicMock(spec=discord.Guild) + guild.id = guild_id + guild.name = name + guild.icon = None + guild.get_channel.return_value = None + guild.get_member.return_value = None + guild.get_role.return_value = None + guild.timezone = "UTC" + return guild + + +def _make_author(name="testuser", user_id=222222222222222222, bot=False): + author = MagicMock(spec=discord.Member) + author.id = user_id + author.name = name + author.discriminator = "0" + author.display_name = name + author.display_avatar = None + author.display_icon = None + author.top_role = None + author.colour = "#FFFFFF" + author.bot = bot + author.public_flags.verified_bot = False + author.created_at = datetime(2020, 1, 1, tzinfo=pytz.utc) + author.joined_at = datetime(2021, 1, 1, tzinfo=pytz.utc) + return author + + +def _make_message(content="", msg_id=1, created_at=None, author=None, guild=None): + msg = MagicMock() + msg.id = msg_id + msg.type = MagicMock() + msg.type.__eq__ = lambda self, other: False # not a special message type + msg.content = content + msg.created_at = created_at or datetime(2024, 1, 1, 12, 0, 0, tzinfo=pytz.utc) + msg.edited_at = None + msg.author = author or _make_author() + msg.reference = None + msg.attachments = [] + msg.embeds = [] + msg.reactions = [] + msg.components = [] + msg.stickers = [] + msg.interaction = None + msg.message_snapshots = [] + msg.channel = MagicMock() + msg.channel.type = MagicMock() + msg.channel.type.__str__ = lambda self: "text" + msg.channel.guild = guild or _make_guild() + return msg + + +def _make_channel(guild=None): + ch = MagicMock() + ch.name = "test-channel" + ch.id = 333333333333333333 + ch.topic = None + ch.guild = guild or _make_guild() + ch.type = MagicMock() + ch.type.__str__ = lambda self: "text" + ch.created_at = datetime(2019, 1, 1, tzinfo=pytz.utc) + return ch + + +def _make_embed( + title=None, + description=None, + colour=0x5B8DEF, + fields=None, + author_name=None, + author_icon=None, + author_url=None, + footer_text=None, + footer_icon=None, + image_url=None, + thumbnail_url=None, + timestamp=None, + url=None, +): + embed = MagicMock(spec=discord.Embed) + embed.title = title + embed.description = description + embed.colour = MagicMock() + embed.colour.r = (colour >> 16) & 0xFF + embed.colour.g = (colour >> 8) & 0xFF + embed.colour.b = colour & 0xFF + embed.colour.__ne__ = lambda s, o: True + + embed.fields = [] + if fields: + for f in fields: + field = MagicMock() + field.name = f.get("name") + field.value = f.get("value") + field.inline = f.get("inline", False) + embed.fields.append(field) + + embed.author = MagicMock() + embed.author.name = author_name + embed.author.icon_url = author_icon + embed.author.url = author_url + + embed.footer = MagicMock() + embed.footer.text = footer_text + embed.footer.icon_url = footer_icon + + embed.image = MagicMock() + embed.image.url = image_url + embed.image.proxy_url = image_url + + embed.thumbnail = MagicMock() + embed.thumbnail.url = thumbnail_url + embed.thumbnail.proxy_url = thumbnail_url + + embed.timestamp = timestamp + embed.url = url + return embed + + +def _run(coro): + return asyncio.get_event_loop().run_until_complete(coro) + + +class TestRawExport(unittest.TestCase): + def setUp(self): + self.guild = _make_guild() + self.channel = _make_channel(guild=self.guild) + os.makedirs(ARTIFACTS_DIR, exist_ok=True) + + def _export(self, messages, filename=None, channel=None, guild=None): + channel = channel or self.channel + guild = guild or self.guild + html = _run( + chat_exporter.raw_export( + channel=channel, + messages=messages, + tz_info="UTC", + military_time=True, + guild=guild, + ) + ) + if filename and html: + path = os.path.join(ARTIFACTS_DIR, filename) + with open(path, "w", encoding="utf-8") as f: + f.write(html) + return html + + def test_plain_message_renders(self): + """A simple plain-text message should appear in the exported HTML.""" + msg = _make_message("Hello, world!", guild=self.guild) + html = self._export([msg], "plain_message.html") + self.assertIsNotNone(html) + self.assertIn("Hello, world!", html) + + def test_bold_formatting_in_message(self): + """Bold markdown in a message should render as tags.""" + msg = _make_message("This is **bold** text", guild=self.guild) + html = self._export([msg], "bold_formatting.html") + self.assertIn("bold", html) + + def test_heading_in_message(self): + """Heading markdown should render as

    tags.""" + msg = _make_message("# Big Title\nSome text below.", guild=self.guild) + html = self._export([msg], "heading.html") + self.assertIn("

    Big Title

    ", html) + + def test_blockquote_in_message(self): + """Blockquote markdown should render inside a .quote div.""" + msg = _make_message("> This is a quote", guild=self.guild) + html = self._export([msg], "blockquote.html") + self.assertIn('class="quote"', html) + self.assertIn("This is a quote", html) + + def test_subtext_in_message(self): + """Subtext -# should render as a tag.""" + msg = _make_message("-# small print here", guild=self.guild) + html = self._export([msg], "subtext.html") + self.assertIn("small print here", html) + + def test_multiple_messages(self): + """Multiple messages should all appear in the final export.""" + msgs = [ + _make_message("First message", msg_id=1, guild=self.guild), + _make_message("Second message", msg_id=2, guild=self.guild), + _make_message("Third message", msg_id=3, guild=self.guild), + ] + html = self._export(msgs, "multiple_messages.html") + self.assertIn("First message", html) + self.assertIn("Second message", html) + self.assertIn("Third message", html) + + def test_bot_message(self): + """Bot-authored messages should include a bot tag.""" + author = _make_author(name="MyBot", bot=True) + msg = _make_message("I am a bot!", author=author, guild=self.guild) + html = self._export([msg], "bot_message.html") + self.assertIn("I am a bot!", html) + self.assertIn("bot-tag", html) + + def test_heading_no_trailing_breaks(self): + """Headings followed by blank lines should not produce extra
    before the next text.""" + msg = _make_message("# Title\n\n\n\nContent below", guild=self.guild) + html = self._export([msg], "heading_no_breaks.html") + # The heading should render, then Content without piles of
    between them + self.assertIn("

    Title

    ", html) + self.assertIn("Content below", html) + + def test_mention_display_name_like_mention_no_infinite_loop(self): + """ + Regression test for GitHub issue #151. + A user whose display_name is itself formatted as a mention (e.g. '<@999999999>') + should not cause an infinite loop. The export should complete quickly. + """ + import asyncio + + # Mock a member whose display_name looks like a mention + malicious_member = MagicMock(spec=discord.Member) + malicious_member.id = 1234567890 + malicious_member.display_name = "<@999999999>" + malicious_member.display_avatar = None + malicious_member.display_icon = None + malicious_member.top_role = None + malicious_member.colour = "#FFFFFF" + + guild = _make_guild() + guild.get_member.return_value = malicious_member + + # The message content contains a mention of that member + msg = _make_message("<@1234567890> said hello", guild=guild) + msg.channel.guild = guild + + # This should complete without hanging — if it loops infinitely the test will timeout + try: + result = _run( + asyncio.wait_for( + chat_exporter.raw_export(channel=self.channel, messages=[msg], guild=guild), timeout=5.0 + ) + ) + # Export should produce some HTML + self.assertIsNotNone(result) + except asyncio.TimeoutError: + self.fail("raw_export timed out — possible infinite loop regression (issue #151)") + + def test_embed_with_member_mention_in_description(self): + """An embed description containing a member mention should render the mention span correctly.""" + member = MagicMock(spec=discord.Member) + member.id = 987654321 + member.display_name = "TestUser" + member.display_avatar = None + member.display_icon = None + member.top_role = None + member.colour = "#FFFFFF" + + guild = _make_guild() + guild.get_member.return_value = member + channel = _make_channel(guild=guild) + + # Build a realistic embed where None-sentinel fields really are None + embed = MagicMock( + spec=[ + "colour", + "title", + "description", + "fields", + "author", + "image", + "thumbnail", + "footer", + "timestamp", + "url", + ] + ) + embed.colour = MagicMock() + embed.colour.r, embed.colour.g, embed.colour.b = 0x5B, 0x8D, 0xEF + embed.colour.__ne__ = lambda s, o: True + embed.title = None + embed.description = "<@987654321> is mentioned here" + embed.fields = [] + embed.author = MagicMock() + embed.author.name = None + embed.author.url = None + embed.author.icon_url = None + embed.image = MagicMock() + embed.image.url = None + embed.thumbnail = MagicMock() + embed.thumbnail.url = None + embed.footer = MagicMock() + embed.footer.text = None + embed.footer.icon_url = None + embed.timestamp = None + embed.url = None + + msg = _make_message("", guild=guild) + msg.embeds = [embed] + msg.channel.guild = guild + + html = self._export([msg], "embed_mention.html", channel=channel, guild=guild) + self.assertIsNotNone(html) + # The mention should appear as a mention span, not as raw escaped HTML + self.assertIn('class="mention"', html) + self.assertIn("TestUser", html) + + def test_all_mentions_and_timestamps(self): + """Test that all mention types and timestamps render correctly in a full export.""" + # Mock role + role = MagicMock(spec=discord.Role) + role.id = 111 + role.name = "TestRole" + role.color.r, role.color.g, role.color.b = 255, 0, 0 + + # Mock channel + channel_mention = MagicMock(spec=discord.TextChannel) + channel_mention.id = 222 + channel_mention.name = "mentioned-channel" + + # Mock member + member = MagicMock(spec=discord.Member) + member.id = 333 + member.display_name = "MentionedUser" + member.display_avatar = None + member.display_icon = None + member.top_role = None + member.colour = "#FFFFFF" + + guild = _make_guild() + guild.get_role.side_effect = lambda id: role if id == 111 else None + guild.get_channel.side_effect = lambda id: channel_mention if id == 222 else None + guild.get_member.side_effect = lambda id: member if id == 333 else None + + content = ( + "Role: <@&111>\n" + "Channel: <#222>\n" + "User: <@333>\n" + "Everyone: @everyone\n" + "Here: @here\n" + "Timestamp: " + ) + msg = _make_message(content, guild=guild) + html = self._export([msg], "all_mentions.html", guild=guild) + + self.assertIn("@TestRole", html) + self.assertIn("color: #ff0000", html) + self.assertIn("#mentioned-channel", html) + self.assertIn("@MentionedUser", html) + self.assertIn("@everyone", html) + self.assertIn("@here", html) + self.assertIn("unix-timestamp", html) + + def test_complex_embed_rendering(self): + """Test a very complex embed with all fields populated.""" + guild = _make_guild() + member = MagicMock(spec=discord.Member) + member.id = 123 + member.display_name = "EmbedExpert" + member.display_avatar = None + member.display_icon = None + member.top_role = None + member.colour = "#FF5733" + guild.get_member.return_value = member + + embed = _make_embed( + title="Complex Title", + description="This is a description with a mention <@123> and a link [Google](https://google.com)", + colour=0x00FF00, + fields=[ + {"name": "Field 1", "value": "Value 1", "inline": True}, + {"name": "Field 2", "value": "Value 2", "inline": True}, + {"name": "Field 3", "value": "Value 3", "inline": False}, + ], + author_name="Author Name", + author_icon="https://example.com/author.png", + author_url="https://example.com/author", + footer_text="Footer Text", + footer_icon="https://example.com/footer.png", + image_url="https://example.com/image.png", + thumbnail_url="https://example.com/thumbnail.png", + timestamp=datetime(2024, 1, 1, 12, 0, 0, tzinfo=pytz.utc), + url="https://example.com/embed", + ) + + msg = _make_message("Message with complex embed", guild=guild) + msg.embeds = [embed] + html = self._export([msg], "complex_embed.html", guild=guild) + + self.assertIn("Complex Title", html) + self.assertIn("@EmbedExpert", html) + self.assertIn("Value 1", html) + self.assertIn("Value 3", html) + self.assertIn("Author Name", html) + self.assertIn("Footer Text", html) + self.assertIn("https://example.com/image.png", html) + self.assertIn("https://example.com/thumbnail.png", html) + + def test_message_with_multiple_embeds_and_attachments(self): + """Test a message that has multiple embeds and multiple attachments.""" + guild = _make_guild() + + embed1 = _make_embed(title="Embed 1", description="Description 1") + embed2 = _make_embed(title="Embed 2", description="Description 2") + + att1 = MagicMock(spec=discord.Attachment) + att1.url = "https://example.com/file1.png" + att1.proxy_url = "https://example.com/file1.png" + att1.filename = "file1.png" + att1.size = 1048576 + att1.content_type = "image/png" + att1.is_spoiler.return_value = False + + att2 = MagicMock(spec=discord.Attachment) + att2.url = "https://example.com/file2.txt" + att2.proxy_url = "https://example.com/file2.txt" + att2.filename = "file2.txt" + att2.size = 1024 + att2.content_type = "text/plain" + att2.is_spoiler.return_value = False + + msg = _make_message("Multiple things here", guild=guild) + msg.embeds = [embed1, embed2] + msg.attachments = [att1, att2] + + html = self._export([msg], "multiple_things.html", guild=guild) + + self.assertIn("Embed 1", html) + self.assertIn("Embed 2", html) + self.assertIn("file1.png", html) + self.assertIn("file2.txt", html) From e8b13a3c6d3518a984e8d2d501fbe25a6d7c4d15 Mon Sep 17 00:00:00 2001 From: mahtoid Date: Sun, 22 Mar 2026 12:02:23 +0000 Subject: [PATCH 2/8] Fix to test workflow --- .gitignore | 3 +++ pyproject.toml | 1 + 2 files changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 8bf9cf5..6061900 100644 --- a/.gitignore +++ b/.gitignore @@ -171,5 +171,8 @@ chat_exporter/config.py # Tests tests/artifacts +# Bloat +.ruff_cache/ + # macOS .DS_Store \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index fc48dbf..4e7a442 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dev = [ "ruff", "pytest", "pytest-asyncio", + "discord.py", ] [tool.ruff] From cb3a8d147b7e99d32cd19b4a1ceb26c837afb1f5 Mon Sep 17 00:00:00 2001 From: mahtoid Date: Sun, 22 Mar 2026 13:22:17 +0000 Subject: [PATCH 3/8] Comment + Improvements to 'Forwarded' message --- chat_exporter/construct/assets/attachment.py | 4 +-- chat_exporter/construct/message.py | 31 +++++++++++++------ chat_exporter/ext/html_generator.py | 2 +- chat_exporter/html/base.html | 21 ++++++++++++- chat_exporter/html/message/forwarded.html | 6 ++++ .../html/message/reference_forwarded.html | 3 -- 6 files changed, 51 insertions(+), 16 deletions(-) create mode 100644 chat_exporter/html/message/forwarded.html delete mode 100644 chat_exporter/html/message/reference_forwarded.html diff --git a/chat_exporter/construct/assets/attachment.py b/chat_exporter/construct/assets/attachment.py index 0ac23b3..1b00ee6 100644 --- a/chat_exporter/construct/assets/attachment.py +++ b/chat_exporter/construct/assets/attachment.py @@ -221,8 +221,8 @@ def _mark_spoiler(self): return replacements = ( - ("
    ", '
    '), - ('class="chatlog__attachment"', 'class="chatlog__attachment chatlog__attachment-spoiler"'), + ("
    ", "
    "), + ("class='chatlog__attachment'", "class='chatlog__attachment chatlog__attachment-spoiler'"), ) for target, replacement in replacements: diff --git a/chat_exporter/construct/message.py b/chat_exporter/construct/message.py index ebfbb66..41ecba3 100644 --- a/chat_exporter/construct/message.py +++ b/chat_exporter/construct/message.py @@ -24,7 +24,7 @@ message_interaction, message_pin, message_reference, - message_reference_forwarded, + message_forwarded, message_reference_unknown, message_thread, message_thread_add, @@ -50,6 +50,7 @@ class MessageConstruct: # Asset Types embeds: str = "" + forwarded_embeds: str = "" reactions: str = "" components: str = "" attachments: str = "" @@ -144,6 +145,7 @@ async def build_message(self): await self.build_interaction() await self.build_sticker() await self.build_assets() + await self.wrap_forwarded() await self.build_message_template() await self.build_meta_data() @@ -210,8 +212,6 @@ async def build_content(self): combined = html.escape(combined or "") - if self.forwarded: - combined = f'
    {combined}
    ' self.message.content = await fill_out( self.guild, @@ -235,7 +235,7 @@ async def build_reference(self): except (discord.NotFound, discord.HTTPException) as e: self.message.reference = "" if self.forwarded: - self.message.reference = message_reference_forwarded + self.message.reference = "" return if isinstance(e, discord.NotFound): self.message.reference = message_reference_unknown @@ -332,7 +332,7 @@ async def build_sticker(self): for snapshot in self.get_message_snapshots(): if hasattr(snapshot, "stickers") and snapshot.stickers and hasattr(snapshot.stickers[0], "url"): sticker_image_url = snapshot.stickers[0].url - self.message.reference = message_reference_forwarded + self.message.reference = "" break if not sticker_image_url: @@ -366,8 +366,8 @@ async def build_assets(self): for snapshot in self.get_message_snapshots(): if hasattr(snapshot, "embeds"): for se in snapshot.embeds: - self.embeds += await Embed(se, self.guild).flow() - self.message.reference = message_reference_forwarded + self.forwarded_embeds += await Embed(se, self.guild).flow() + self.message.reference = "" for a in self.message.attachments: if self.attachment_handler and isinstance(self.attachment_handler, AttachmentHandler): @@ -389,7 +389,7 @@ async def build_assets(self): if self.attachment_handler: sa = await self.attachment_handler.process_asset(sa) self.attachments += await Attachment(sa, self.guild).flow() - self.message.reference = message_reference_forwarded + self.message.reference = "" for c in self.message.components: self.components += await Component(c, self.guild, self.message.attachments).flow() @@ -398,7 +398,7 @@ async def build_assets(self): if hasattr(snapshot, "components"): for ac in snapshot.components: self.components += await Component(ac, self.guild).flow() - self.message.reference = message_reference_forwarded + self.message.reference = "" for r in self.message.reactions: self.reactions += await Reaction(r, self.guild).flow() @@ -406,6 +406,19 @@ async def build_assets(self): if self.reactions: self.reactions = f'
    {self.reactions}
    ' + async def wrap_forwarded(self): + if not self.forwarded: + return + + self.message.content = ( + f'
    {message_forwarded}{self.message.content}' + f"{self.attachments}{self.forwarded_embeds}{self.components}
    " + ) + + self.attachments = "" + self.forwarded_embeds = "" + self.components = "" + async def build_message_template(self): started = await self.generate_message_divider() diff --git a/chat_exporter/ext/html_generator.py b/chat_exporter/ext/html_generator.py index f500c8a..e2ea584 100644 --- a/chat_exporter/ext/html_generator.py +++ b/chat_exporter/ext/html_generator.py @@ -73,7 +73,7 @@ def read_file(filename): message_thread_remove = read_file(dir_path + "/html/message/thread_remove.html") message_thread_add = read_file(dir_path + "/html/message/thread_add.html") message_reference_unknown = read_file(dir_path + "/html/message/reference_unknown.html") -message_reference_forwarded = read_file(dir_path + "/html/message/reference_forwarded.html") +message_forwarded = read_file(dir_path + "/html/message/forwarded.html") message_body = read_file(dir_path + "/html/message/message.html") end_message = read_file(dir_path + "/html/message/end.html") meta_data_temp = read_file(dir_path + "/html/message/meta.html") diff --git a/chat_exporter/html/base.html b/chat_exporter/html/base.html index 2c91e29..d731c63 100644 --- a/chat_exporter/html/base.html +++ b/chat_exporter/html/base.html @@ -163,7 +163,7 @@ --quote-bar-color: #4e5058; display: flex; align-items: flex-start; - max-width: 90%; + max-width: 610px; margin: 0; padding: 0.05em 0; color: #dbdee1; @@ -182,6 +182,25 @@ align-self: stretch; } + .chatlog__forwarded-header { + display: flex; + align-items: center; + gap: 4px; + margin-bottom: 4px; + color: #b5b6b8; + font-size: 0.8125rem; + font-weight: 600; + user-select: none; + } + + .chatlog__forwarded-icon { + flex-shrink: 0; + } + + .chatlog__forwarded-text { + color: #b5b6b8; + } + .pre { font-family: "Consolas", "Courier New", Courier, monospace; padding: .25em .45em; diff --git a/chat_exporter/html/message/forwarded.html b/chat_exporter/html/message/forwarded.html new file mode 100644 index 0000000..1a10225 --- /dev/null +++ b/chat_exporter/html/message/forwarded.html @@ -0,0 +1,6 @@ +
    + +
    Forwarded
    +
    diff --git a/chat_exporter/html/message/reference_forwarded.html b/chat_exporter/html/message/reference_forwarded.html deleted file mode 100644 index be0f9ad..0000000 --- a/chat_exporter/html/message/reference_forwarded.html +++ /dev/null @@ -1,3 +0,0 @@ -
    - Forwarded message. -
    \ No newline at end of file From e206f3d667861addb46a7757d9df5f43fab59390 Mon Sep 17 00:00:00 2001 From: mahtoid Date: Sun, 22 Mar 2026 13:43:01 +0000 Subject: [PATCH 4/8] Workflow Discord Integration --- .github/workflows/integration-test.yml | 34 ++++++++++++++ .github/workflows/lint.yml | 2 - .github/workflows/test.yml | 2 - tests/integration_test_discord.py | 65 ++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/integration-test.yml create mode 100644 tests/integration_test_discord.py diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 0000000..eea0bfc --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,34 @@ +name: integration-test + +on: + workflow_dispatch + +jobs: + integration-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Run Discord Integration Test + env: + DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }} + run: python tests/integration_test_discord.py + + - name: Upload integration artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: integration-transcript + path: tests/artifacts/integration_transcript.html + if-no-files-found: error diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c283f90..bcfbe46 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3,8 +3,6 @@ name: lint on: push: branches: [ "**" ] - pull_request: - branches: [ "**" ] jobs: ruff: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8571bd6..0ef7ed5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,8 +3,6 @@ name: test on: push: branches: [ "**" ] - pull_request: - branches: [ "**" ] jobs: test: diff --git a/tests/integration_test_discord.py b/tests/integration_test_discord.py new file mode 100644 index 0000000..355103c --- /dev/null +++ b/tests/integration_test_discord.py @@ -0,0 +1,65 @@ +import os +import asyncio +import discord +import chat_exporter +import sys + +# Configuration from environment variables +TOKEN = os.getenv("DISCORD_TOKEN") +GUILD_ID = 715959114647208017 +CHANNEL_ID = 1456616358681772135 + +if not TOKEN: + print("Error: DISCORD_TOKEN environment variable is not set.") + sys.exit(1) + +intents = discord.Intents.default() +intents.message_content = True +intents.members = True + +bot = discord.Client(intents=intents) + +@bot.event +async def on_ready(): + print(f"Logged in as {bot.user}") + + guild = bot.get_guild(GUILD_ID) + if not guild: + print(f"Error: Could not find guild with ID {GUILD_ID}") + await bot.close() + sys.exit(1) + + channel = guild.get_channel(CHANNEL_ID) + if not channel: + print(f"Error: Could not find channel with ID {CHANNEL_ID} in guild {GUILD_ID}") + await bot.close() + sys.exit(1) + + print(f"Exporting channel: {channel.name} ({channel.id})") + + try: + transcript = await chat_exporter.export( + channel, + bot=bot, + ) + + if transcript: + os.makedirs("tests/artifacts", exist_ok=True) + with open("tests/artifacts/integration_transcript.html", "w", encoding="utf-8") as f: + f.write(transcript) + print("Successfully saved transcript to tests/artifacts/integration_transcript.html") + else: + print("Error: Export returned empty transcript.") + await bot.close() + sys.exit(1) + + except Exception as e: + print(f"Error during export: {e}") + await bot.close() + sys.exit(1) + + print("Integration test completed successfully.") + await bot.close() + +if __name__ == "__main__": + bot.run(TOKEN) From 9f324f53198854f44ac661d5b231bed691c29b84 Mon Sep 17 00:00:00 2001 From: Matthew Moss <56257224+mahtoid@users.noreply.github.com> Date: Sun, 22 Mar 2026 13:44:39 +0000 Subject: [PATCH 5/8] Add push trigger to integration test workflow --- .github/workflows/integration-test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index eea0bfc..1fe88e3 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -1,7 +1,8 @@ name: integration-test on: - workflow_dispatch + push: + branches: [ "**" ] jobs: integration-test: From 1b535f0913ee2a90cfab8e63a247a3c12a68bced Mon Sep 17 00:00:00 2001 From: mahtoid Date: Sun, 22 Mar 2026 15:20:17 +0000 Subject: [PATCH 6/8] Image Grids --- chat_exporter/construct/assets/__init__.py | 3 +- chat_exporter/construct/assets/attachment.py | 73 +++++++ chat_exporter/construct/message.py | 49 ++++- chat_exporter/ext/html_generator.py | 2 + chat_exporter/html/attachment/image_grid.html | 3 + .../html/attachment/image_grid_item.html | 3 + chat_exporter/html/base.html | 65 +++++++ tests/test_export.py | 182 ++++++++++++++++-- 8 files changed, 364 insertions(+), 16 deletions(-) create mode 100644 chat_exporter/html/attachment/image_grid.html create mode 100644 chat_exporter/html/attachment/image_grid_item.html diff --git a/chat_exporter/construct/assets/__init__.py b/chat_exporter/construct/assets/__init__.py index d107085..b2b611b 100644 --- a/chat_exporter/construct/assets/__init__.py +++ b/chat_exporter/construct/assets/__init__.py @@ -1,4 +1,4 @@ -from .attachment import Attachment +from .attachment import Attachment, AttachmentGrid from .component import Component from .embed import Embed from .reaction import Reaction @@ -7,5 +7,6 @@ Embed, Reaction, Attachment, + AttachmentGrid, Component, ) diff --git a/chat_exporter/construct/assets/attachment.py b/chat_exporter/construct/assets/attachment.py index 1b00ee6..e53e680 100644 --- a/chat_exporter/construct/assets/attachment.py +++ b/chat_exporter/construct/assets/attachment.py @@ -6,6 +6,8 @@ audio_attachment, fill_out, img_attachment, + img_grid, + img_grid_item, msg_attachment, video_attachment, ) @@ -44,6 +46,25 @@ async def build_attachment(self): if is_spoiler: self._mark_spoiler() + async def get_data(self): + is_spoiler = self._is_spoiler() + if self.attachments.content_type is not None: + if "image" in self.attachments.content_type: + return { + "type": "image", + "url": self.attachments.proxy_url, + "is_spoiler": is_spoiler, + "filename": self.attachments.filename, + } + elif "video" in self.attachments.content_type: + return { + "type": "video", + "url": self.attachments.proxy_url, + "is_spoiler": is_spoiler, + "filename": self.attachments.filename, + } + return None + async def image(self): self.attachments = await fill_out( self.guild, @@ -222,10 +243,62 @@ def _mark_spoiler(self): replacements = ( ("
    ", "
    "), + ("
    ", "
    "), + ("
    ", "
    "), ("class='chatlog__attachment'", "class='chatlog__attachment chatlog__attachment-spoiler'"), + ("class=\"chatlog__attachment\"", "class=\"chatlog__attachment chatlog__attachment-spoiler\""), + ("class=chatlog__attachment", "class=\"chatlog__attachment chatlog__attachment-spoiler\""), ) for target, replacement in replacements: if target in self.attachments: self.attachments = self.attachments.replace(target, replacement, 1) break + + +class AttachmentGrid: + def __init__(self, attachments, guild, splitIndex): + self.attachments = attachments + self.guild = guild + self.splitIndex = splitIndex + + async def flow(self): + grid_items_html = "" + for a in self.attachments: + item_content = await Attachment(a, self.guild).flow() + + grid_items_html += await fill_out( + self.guild, + img_grid_item, + [ + ("ITEM_CLASS", "", PARSE_MODE_NONE), + ("ITEM_CONTENT", item_content, PARSE_MODE_NONE), + ], + ) + + grid_class = self._get_grid_class(len(self.attachments), self.splitIndex) + + return await fill_out( + self.guild, + img_grid, + [ + ("GRID_CLASS", grid_class, PARSE_MODE_NONE), + ("GRID_ITEMS", grid_items_html, PARSE_MODE_NONE), + ], + ) + + @staticmethod + def _get_grid_class(count, splitIndex): + if count == 1: + return "chatlog__attachment-grid--1x1" + elif count == 2: + return "chatlog__attachment-grid--1x2" + elif count == 3: + if splitIndex == 0: + return "chatlog__attachment-grid--1x3" # mosaic + else: + return "chatlog__attachment-grid--3x3" + elif count <= 4: + return "chatlog__attachment-grid--2x2" + else: + return "chatlog__attachment-grid--3x3" diff --git a/chat_exporter/construct/message.py b/chat_exporter/construct/message.py index 41ecba3..83cee89 100644 --- a/chat_exporter/construct/message.py +++ b/chat_exporter/construct/message.py @@ -4,7 +4,7 @@ from pytz import timezone -from chat_exporter.construct.assets import Attachment, Component, Embed, Reaction +from chat_exporter.construct.assets import Attachment, AttachmentGrid, Component, Embed, Reaction from chat_exporter.construct.attachment_handler import AttachmentHandler from chat_exporter.ext.cache import cache from chat_exporter.ext.discord_import import discord @@ -359,6 +359,29 @@ async def build_sticker(self): ], ) + @staticmethod + def calculate_grid_splits(n): + if n <= 4: + return [n] + if n == 9: + return [9] + if n == 5: + return [2, 3] + if n == 6: + return [3, 3] + if n == 7: + return [3, 4] + if n == 8: + return [4, 4] + if n == 10: + return [1, 9] + + if n > 9: + res = MessageConstruct.calculate_grid_splits(n - 9) + res.append(9) + return res + return [n] + async def build_assets(self): processed_attachments = [] attachment_urls = set() @@ -380,8 +403,30 @@ async def build_assets(self): continue self.embeds += await Embed(e, self.guild, self.pytz_timezone, self.military_time).flow() + media_group = [] + + async def flush_media_group(group): + if not group: + return "" + html_output = "" + splits = self.calculate_grid_splits(len(group)) + start = 0 + for i, s in enumerate(splits): + html_output += await AttachmentGrid(group[start : start + s], self.guild, i).flow() + start += s + return html_output + for a in processed_attachments: - self.attachments += await Attachment(a, self.guild).flow() + if a.content_type and ("image" in a.content_type or "video" in a.content_type): + media_group.append(a) + else: + if media_group: + self.attachments += await flush_media_group(media_group) + media_group = [] + self.attachments += await Attachment(a, self.guild).flow() + + if media_group: + self.attachments += await flush_media_group(media_group) for snapshot in self.get_message_snapshots(): if hasattr(snapshot, "attachments"): diff --git a/chat_exporter/ext/html_generator.py b/chat_exporter/ext/html_generator.py index e2ea584..e95bed3 100644 --- a/chat_exporter/ext/html_generator.py +++ b/chat_exporter/ext/html_generator.py @@ -113,6 +113,8 @@ def read_file(filename): # ATTACHMENT img_attachment = read_file(dir_path + "/html/attachment/image.html") +img_grid = read_file(dir_path + "/html/attachment/image_grid.html") +img_grid_item = read_file(dir_path + "/html/attachment/image_grid_item.html") msg_attachment = read_file(dir_path + "/html/attachment/message.html") audio_attachment = read_file(dir_path + "/html/attachment/audio.html") video_attachment = read_file(dir_path + "/html/attachment/video.html") diff --git a/chat_exporter/html/attachment/image_grid.html b/chat_exporter/html/attachment/image_grid.html new file mode 100644 index 0000000..cf617d3 --- /dev/null +++ b/chat_exporter/html/attachment/image_grid.html @@ -0,0 +1,3 @@ +
    + {{GRID_ITEMS}} +
    diff --git a/chat_exporter/html/attachment/image_grid_item.html b/chat_exporter/html/attachment/image_grid_item.html new file mode 100644 index 0000000..a9986a3 --- /dev/null +++ b/chat_exporter/html/attachment/image_grid_item.html @@ -0,0 +1,3 @@ +
    + {{ITEM_CONTENT}} +
    diff --git a/chat_exporter/html/base.html b/chat_exporter/html/base.html index d731c63..cb33fb7 100644 --- a/chat_exporter/html/base.html +++ b/chat_exporter/html/base.html @@ -546,6 +546,71 @@ filter: blur(0); } + .chatlog__attachment-grid { + display: grid; + gap: 4px; + margin-top: 0.3em; + width: fit-content; + max-width: 550px; + } + + .chatlog__attachment-grid-item, + .chatlog__attachment, + .chatlog__attachment-spoiler, + .chatlog__attachment-spoiler a { + max-height: inherit; + } + + .chatlog__attachment-thumbnail { + display: block; + max-height: inherit; + width: auto; + } + + .chatlog__attachment-grid--1x1 { + grid-template-columns: 1fr; + max-height: 280px; + } + + .chatlog__attachment-grid--1x2 { + grid-template-columns: repeat(2, 1fr); + } + + .chatlog__attachment-grid--1x3 { + grid-template-columns: 2fr 1fr; + grid-template-rows: repeat(2, 1fr); + } + + .chatlog__attachment-grid--1x3 .chatlog__attachment-grid-item:first-child { + grid-row: span 2; + } + + .chatlog__attachment-grid--2x2 { + grid-template-columns: repeat(2, 1fr); + } + + .chatlog__attachment-grid--3x3 { + grid-template-columns: repeat(3, 1fr); + } + + .chatlog__attachment-grid-item { + position: relative; + overflow: hidden; + border-radius: 8px; + } + + .chatlog__attachment-grid-item .chatlog__attachment { + margin-top: 0; + } + + .chatlog__attachment-grid-item .chatlog__attachment-thumbnail { + max-width: 100%; + max-height: 100%; + object-fit: cover; + width: 100%; + height: 100%; + } + .chatlog__attachment-audio-container audio { padding-top: 0.5em; width: 100%; diff --git a/tests/test_export.py b/tests/test_export.py index c9fa0d9..2796193 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -391,14 +391,14 @@ def test_complex_embed_rendering(self): {"name": "Field 3", "value": "Value 3", "inline": False}, ], author_name="Author Name", - author_icon="https://example.com/author.png", - author_url="https://example.com/author", + author_icon="https://mahto.id/assets/me.png", + author_url="https://mahto.id/assets/me.png", footer_text="Footer Text", - footer_icon="https://example.com/footer.png", - image_url="https://example.com/image.png", - thumbnail_url="https://example.com/thumbnail.png", + footer_icon="https://mahto.id/assets/me.png", + image_url="https://mahto.id/assets/me.png", + thumbnail_url="https://mahto.id/assets/me.png", timestamp=datetime(2024, 1, 1, 12, 0, 0, tzinfo=pytz.utc), - url="https://example.com/embed", + url="https://mahto.id/assets/me.png", ) msg = _make_message("Message with complex embed", guild=guild) @@ -411,8 +411,8 @@ def test_complex_embed_rendering(self): self.assertIn("Value 3", html) self.assertIn("Author Name", html) self.assertIn("Footer Text", html) - self.assertIn("https://example.com/image.png", html) - self.assertIn("https://example.com/thumbnail.png", html) + self.assertIn("https://mahto.id/assets/me.png", html) + self.assertIn("https://mahto.id/assets/me.png", html) def test_message_with_multiple_embeds_and_attachments(self): """Test a message that has multiple embeds and multiple attachments.""" @@ -422,16 +422,16 @@ def test_message_with_multiple_embeds_and_attachments(self): embed2 = _make_embed(title="Embed 2", description="Description 2") att1 = MagicMock(spec=discord.Attachment) - att1.url = "https://example.com/file1.png" - att1.proxy_url = "https://example.com/file1.png" + att1.url = "https://mahto.id/assets/me.png" + att1.proxy_url = "https://mahto.id/assets/me.png" att1.filename = "file1.png" att1.size = 1048576 att1.content_type = "image/png" att1.is_spoiler.return_value = False att2 = MagicMock(spec=discord.Attachment) - att2.url = "https://example.com/file2.txt" - att2.proxy_url = "https://example.com/file2.txt" + att2.url = "https://mahto.id/assets/me.png" + att2.proxy_url = "https://mahto.id/assets/me.png" att2.filename = "file2.txt" att2.size = 1024 att2.content_type = "text/plain" @@ -445,5 +445,161 @@ def test_message_with_multiple_embeds_and_attachments(self): self.assertIn("Embed 1", html) self.assertIn("Embed 2", html) - self.assertIn("file1.png", html) + self.assertIn("https://mahto.id/assets/me.png", html) self.assertIn("file2.txt", html) + + def test_image_spoiler(self): + """An image marked as a spoiler should have the spoiler CSS class.""" + guild = _make_guild() + + att = MagicMock(spec=discord.Attachment) + att.url = "https://mahto.id/assets/me.png" + att.proxy_url = "https://mahto.id/assets/me.png" + att.filename = "SPOILER_file.png" + att.size = 100 + att.content_type = "image/png" + att.is_spoiler.return_value = True + + msg = _make_message("", guild=guild) + msg.attachments = [att] + + html = self._export([msg], "image_spoiler.html", guild=guild) + + self.assertIn("chatlog__attachment-spoiler", html) + self.assertIn("SPOILER", html) + + def test_image_grid_2(self): + """Two consecutive images should be rendered in a 1x2 grid.""" + guild = _make_guild() + + att1 = MagicMock(spec=discord.Attachment) + att1.url = "https://mahto.id/assets/me.png" + att1.proxy_url = "https://mahto.id/assets/me.png" + att1.filename = "img1.png" + att1.size = 100 + att1.content_type = "image/png" + att1.is_spoiler.return_value = False + + att2 = MagicMock(spec=discord.Attachment) + att2.url = "https://mahto.id/assets/me.png" + att2.proxy_url = "https://mahto.id/assets/me.png" + att2.filename = "img2.png" + att2.size = 100 + att2.content_type = "image/png" + att2.is_spoiler.return_value = False + + msg = _make_message("", guild=guild) + msg.attachments = [att1, att2] + + html = self._export([msg], "image_grid_2.html", guild=guild) + + self.assertIn("chatlog__attachment-grid--1x2", html) + self.assertIn("https://mahto.id/assets/me.png", html) + + def test_image_grid_3(self): + """Three consecutive images should be rendered in a 1x3 grid.""" + guild = _make_guild() + attachments = [] + for i in range(3): + att = MagicMock(spec=discord.Attachment) + att.url = "https://mahto.id/assets/me.png" + att.proxy_url = "https://mahto.id/assets/me.png" + att.filename = f"img{i}.png" + att.size = 100 + att.content_type = "image/png" + att.is_spoiler.return_value = False + attachments.append(att) + + msg = _make_message("", guild=guild) + msg.attachments = attachments + + html = self._export([msg], "image_grid_3.html", guild=guild) + + self.assertIn("chatlog__attachment-grid--1x3", html) + self.assertIn("https://mahto.id/assets/me.png", html) + + def test_image_grid_4(self): + """Four consecutive images should be rendered in a 2x2 grid.""" + guild = _make_guild() + attachments = [] + for i in range(4): + att = MagicMock(spec=discord.Attachment) + att.url = "https://mahto.id/assets/me.png" + att.proxy_url = "https://mahto.id/assets/me.png" + att.filename = f"img{i}.png" + att.size = 100 + att.content_type = "image/png" + att.is_spoiler.return_value = False + attachments.append(att) + + msg = _make_message("", guild=guild) + msg.attachments = attachments + + html = self._export([msg], "image_grid_4.html", guild=guild) + + self.assertIn("chatlog__attachment-grid--2x2", html) + self.assertIn("https://mahto.id/assets/me.png", html) + + def test_image_grid_5(self): + """Five consecutive images should be split into 1x2 and 1x3 grids.""" + guild = _make_guild() + attachments = [] + for i in range(5): + att = MagicMock(spec=discord.Attachment) + att.url = "https://mahto.id/assets/me.png" + att.proxy_url = "https://mahto.id/assets/me.png" + att.filename = f"img{i}.png" + att.size = 100 + att.content_type = "image/png" + att.is_spoiler.return_value = False + attachments.append(att) + + msg = _make_message("", guild=guild) + msg.attachments = attachments + + html = self._export([msg], "image_grid_5.html", guild=guild) + + self.assertIn("chatlog__attachment-grid--1x2", html) + self.assertIn("chatlog__attachment-grid--1x3", html) + self.assertIn("https://mahto.id/assets/me.png", html) + + def test_image_grid_10(self): + """Ten consecutive images should be split into 1x1 and 3x3 grids.""" + guild = _make_guild() + attachments = [] + for i in range(10): + att = MagicMock(spec=discord.Attachment) + att.url = "https://mahto.id/assets/me.png" + att.proxy_url = "https://mahto.id/assets/me.png" + att.filename = f"img{i}.png" + att.size = 100 + att.content_type = "image/png" + att.is_spoiler.return_value = False + attachments.append(att) + + msg = _make_message("", guild=guild) + msg.attachments = attachments + + html = self._export([msg], "image_grid_10.html", guild=guild) + + self.assertIn("chatlog__attachment-grid--1x1", html) + self.assertIn("chatlog__attachment-grid--3x3", html) + self.assertIn("https://mahto.id/assets/me.png", html) + + def test_grid_width_restriction(self): + """The grid should have a max-width restriction.""" + guild = _make_guild() + att = MagicMock(spec=discord.Attachment) + att.url = "https://mahto.id/assets/me.png" + att.proxy_url = "https://mahto.id/assets/me.png" + att.filename = "img1.png" + att.size = 100 + att.content_type = "image/png" + att.is_spoiler.return_value = False + + msg = _make_message("", guild=guild) + msg.attachments = [att, att] # 2 images for a grid + + html = self._export([msg], "grid_width.html", guild=guild) + + self.assertIn("max-width: 550px", html) From e3f9e8a8c397b465a1e12e4e5614585b23077673 Mon Sep 17 00:00:00 2001 From: mahtoid Date: Sun, 22 Mar 2026 15:26:05 +0000 Subject: [PATCH 7/8] Lint workflow to fail on error + lint solves --- .github/workflows/lint.yml | 2 +- chat_exporter/construct/message.py | 2 +- chat_exporter/ext/html_generator.py | 3 ++- tests/integration_test_discord.py | 5 +++-- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index bcfbe46..138c0e9 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -11,5 +11,5 @@ jobs: - uses: actions/checkout@v4 - uses: astral-sh/ruff-action@v1 with: - args: "check --select S,E,F,I --exit-zero" + args: "check --select S,E,F,I" diff --git a/chat_exporter/construct/message.py b/chat_exporter/construct/message.py index 83cee89..5238b36 100644 --- a/chat_exporter/construct/message.py +++ b/chat_exporter/construct/message.py @@ -21,10 +21,10 @@ img_attachment, message_body, message_content, + message_forwarded, message_interaction, message_pin, message_reference, - message_forwarded, message_reference_unknown, message_thread, message_thread_add, diff --git a/chat_exporter/ext/html_generator.py b/chat_exporter/ext/html_generator.py index e95bed3..e5887bb 100644 --- a/chat_exporter/ext/html_generator.py +++ b/chat_exporter/ext/html_generator.py @@ -3,7 +3,8 @@ import os import re -from chat_exporter.parse.markdown import ParseMarkdown, bot as mention_bot +from chat_exporter.parse.markdown import ParseMarkdown +from chat_exporter.parse.markdown import bot as mention_bot dir_path = os.path.abspath(os.path.join((os.path.dirname(os.path.realpath(__file__))), "..")) diff --git a/tests/integration_test_discord.py b/tests/integration_test_discord.py index 355103c..dfc094d 100644 --- a/tests/integration_test_discord.py +++ b/tests/integration_test_discord.py @@ -1,8 +1,9 @@ import os -import asyncio +import sys + import discord + import chat_exporter -import sys # Configuration from environment variables TOKEN = os.getenv("DISCORD_TOKEN") From 5fb7c2e71f7d2f8d834546af32d210d2a57d8e3a Mon Sep 17 00:00:00 2001 From: mahtoid Date: Wed, 25 Mar 2026 23:44:20 +0000 Subject: [PATCH 8/8] Various bug fixes --- README.md | 8 +- chat_exporter/construct/assets/attachment.py | 47 +- chat_exporter/construct/assets/component.py | 374 ++++----- chat_exporter/construct/assets/embed.py | 2 +- chat_exporter/construct/message.py | 23 +- chat_exporter/html/attachment/video.html | 2 +- chat_exporter/html/base.html | 830 ++++++++++--------- chat_exporter/parse/ast.py | 12 +- chat_exporter/parse/markdown.py | 16 +- tests/test_ast.py | 9 + tests/test_export.py | 24 +- 11 files changed, 700 insertions(+), 647 deletions(-) diff --git a/README.md b/README.md index ef0a8c8..5f50821 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ how to use the `AttachmentHandler`. One using the example of storing files on a an example of storing them on Discord *(the latter merely just being an example, this will still obviously run in to the expiration issue)*. -If you do not specify an attachment handler, chat-exporter will continue to use the (proxy) URLs for the assets. +If you do not specify an attachment handler, chat-exporter will continue to use the URLs for the assets.
    Concept @@ -266,14 +266,14 @@ class MyAttachmentHandler(AttachmentHandler): # now we can generate the asset url from the identifier asset_url = await self.cloud_client.get_share_url(asset_id, shared_with="everyone") - # and set the proxy url attribute of the attachment to the generated url - attachment.proxy_url = asset_url + # and set the url attribute of the attachment to the generated url + attachment.url = asset_url return attachment ``` Note -1. The `process_asset` method should return the attachment object with the proxy_url attribute set to the generated url. +1. The `process_asset` method should return the attachment object with the url attribute set to the generated url. 2. The `process_asset` method should be an async method, as it is likely that you have to do some async operations like fetching the content of the attachment or uploading it to the cloud. 3. You are free to add other methods in your class, and call them from `process_asset` if you need to do some diff --git a/chat_exporter/construct/assets/attachment.py b/chat_exporter/construct/assets/attachment.py index e53e680..8c64148 100644 --- a/chat_exporter/construct/assets/attachment.py +++ b/chat_exporter/construct/assets/attachment.py @@ -52,14 +52,14 @@ async def get_data(self): if "image" in self.attachments.content_type: return { "type": "image", - "url": self.attachments.proxy_url, + "url": self.attachments.url, "is_spoiler": is_spoiler, "filename": self.attachments.filename, } elif "video" in self.attachments.content_type: return { "type": "video", - "url": self.attachments.proxy_url, + "url": self.attachments.url, "is_spoiler": is_spoiler, "filename": self.attachments.filename, } @@ -70,14 +70,20 @@ async def image(self): self.guild, img_attachment, [ - ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), - ("ATTACH_URL_THUMB", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_URL", self.attachments.url, PARSE_MODE_NONE), + ("ATTACH_URL_THUMB", self.attachments.url, PARSE_MODE_NONE), ], ) async def video(self): + width_str = f'width="{self.attachments.width}"' if getattr(self.attachments, "width", None) else "" + height_str = f'height="{self.attachments.height}"' if getattr(self.attachments, "height", None) else "" self.attachments = await fill_out( - self.guild, video_attachment, [("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE)] + self.guild, video_attachment, [ + ("ATTACH_URL", self.attachments.url, PARSE_MODE_NONE), + ("ATTACH_WIDTH", width_str, PARSE_MODE_NONE), + ("ATTACH_HEIGHT", height_str, PARSE_MODE_NONE), + ] ) async def audio(self): @@ -89,9 +95,9 @@ async def audio(self): audio_attachment, [ ("ATTACH_ICON", file_icon, PARSE_MODE_NONE), - ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_URL", self.attachments.url, PARSE_MODE_NONE), ("ATTACH_BYTES", str(file_size), PARSE_MODE_NONE), - ("ATTACH_AUDIO", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_AUDIO", self.attachments.url, PARSE_MODE_NONE), ("ATTACH_FILE", str(self.attachments.filename), PARSE_MODE_NONE), ], ) @@ -106,7 +112,7 @@ async def file(self): msg_attachment, [ ("ATTACH_ICON", file_icon, PARSE_MODE_NONE), - ("ATTACH_URL", self.attachments.proxy_url, PARSE_MODE_NONE), + ("ATTACH_URL", self.attachments.url, PARSE_MODE_NONE), ("ATTACH_BYTES", str(file_size), PARSE_MODE_NONE), ("ATTACH_FILE", str(self.attachments.filename), PARSE_MODE_NONE), ], @@ -126,7 +132,7 @@ async def get_file_icon(self) -> str: return self.resolve_file_icon( name=str(getattr(self.attachments, "filename", "") or ""), content_type=str(getattr(self.attachments, "content_type", "") or ""), - url=str(getattr(self.attachments, "proxy_url", "") or ""), + url=str(getattr(self.attachments, "url", "") or ""), ) @staticmethod @@ -257,10 +263,10 @@ def _mark_spoiler(self): class AttachmentGrid: - def __init__(self, attachments, guild, splitIndex): + def __init__(self, attachments, guild, attachmentCount): self.attachments = attachments self.guild = guild - self.splitIndex = splitIndex + self.attachmentCount = attachmentCount async def flow(self): grid_items_html = "" @@ -276,7 +282,7 @@ async def flow(self): ], ) - grid_class = self._get_grid_class(len(self.attachments), self.splitIndex) + grid_class = self._get_grid_class(len(self.attachments), self.attachmentCount) return await fill_out( self.guild, @@ -288,17 +294,18 @@ async def flow(self): ) @staticmethod - def _get_grid_class(count, splitIndex): - if count == 1: + def _get_grid_class(chunk_size, total_count): + if chunk_size == 1: + if total_count == 1: + return "chatlog__attachment-grid--single" return "chatlog__attachment-grid--1x1" - elif count == 2: + elif chunk_size == 2: return "chatlog__attachment-grid--1x2" - elif count == 3: - if splitIndex == 0: + elif chunk_size == 3: + if total_count == 3: return "chatlog__attachment-grid--1x3" # mosaic - else: - return "chatlog__attachment-grid--3x3" - elif count <= 4: + return "chatlog__attachment-grid--3x3" + elif chunk_size == 4: return "chatlog__attachment-grid--2x2" else: return "chatlog__attachment-grid--3x3" diff --git a/chat_exporter/construct/assets/component.py b/chat_exporter/construct/assets/component.py index c425281..753e02b 100644 --- a/chat_exporter/construct/assets/component.py +++ b/chat_exporter/construct/assets/component.py @@ -1,3 +1,4 @@ +import html from urllib.parse import urlparse from chat_exporter.construct.assets.attachment import Attachment @@ -23,6 +24,14 @@ fill_out, ) +modules_which_use_none = ["nextcord", "disnake"] + + +def _gather_checker(): + if discord.module not in modules_which_use_none and hasattr(discord.Embed, "Empty"): + return discord.Embed.Empty + return None + class Component: styles = { @@ -38,10 +47,24 @@ class Component: "link": "#4F545C", } + _type_map = { + 1: "build_action_row", + 2: "build_button", + 3: "build_menu", + 9: "build_section", + 10: "build_text_display", + 11: "build_thumbnail", + 12: "build_media_gallery", + 13: "build_file", + 14: "build_separator", + 17: "build_container", + } + components: str = "" menus: str = "" buttons: str = "" menu_div_id: int = 0 + check_against = None def __init__(self, component, guild, attachments=None): self.component = component @@ -58,90 +81,38 @@ def __init__(self, component, guild, attachments=None): self.components = "" self.menus = "" self.buttons = "" + self.check_against = _gather_checker() - @staticmethod - def _get_media_url(media): - """Return a best-effort URL string from a media/file object, dict, or raw string.""" - if not media: - return "" - if isinstance(media, str): - return media - if isinstance(media, dict): - return str(media.get("url", "")) - return str(getattr(media, "url", "")) - - @staticmethod - def _get_attr(obj, key, default=None): - if isinstance(obj, dict): - return obj.get(key, default) - return getattr(obj, key, default) + # --- Core Builders --- - @staticmethod - def _stringify_emoji(emoji_obj): - """Return a displayable emoji string from dict/emoji.""" - if not emoji_obj: - return "" - if isinstance(emoji_obj, dict): - emoji_id = emoji_obj.get("id") - emoji_name = emoji_obj.get("name") or "" - if emoji_id: - return f"<:{emoji_name}:{emoji_id}>" - return emoji_name - return str(emoji_obj) - - @staticmethod - def _file_display_name(url: str) -> str: - """Return a clean filename without query/fragment.""" - if not url: - return "" - if url.startswith("attachment://"): - return url.replace("attachment://", "") - - parsed = urlparse(url) - path_name = parsed.path.rsplit("/", 1)[-1] if parsed.path else url - return path_name or url - - def _find_related_attachment(self, media, file_name: str): - """Attempt to match a component media item to a real attachment for metadata.""" - if not self.attachments: - return None - - attachment_id = getattr(media, "attachment_id", None) if media else None - if attachment_id is not None: - for attachment in self.attachments: - if getattr(attachment, "id", None) == attachment_id: - return attachment - - for attachment in self.attachments: - if file_name and str(getattr(attachment, "filename", "")) == file_name: - return attachment - - media_url = self._get_media_url(media) - if media_url: - for attachment in self.attachments: - if getattr(attachment, "url", None) == media_url or getattr(attachment, "proxy_url", None) == media_url: - return attachment + async def flow(self): + # Try to handle the component directly + component_html = await self.build_component(self.component) + if component_html: + self.components += component_html + else: + # Fallback to legacy flow for action rows with children + children = getattr(self.component, "children", []) or getattr(self.component, "components", []) + for c in children: + child_html = await self.build_component(c) + if child_html: + self.buttons += child_html - return None + if self.menus: + self.components += f'
    {self.menus}
    ' - @staticmethod - def _get_file_extension(name: str) -> str: - if not name or "." not in name: - return "" - return name.rsplit(".", 1)[-1].lower() + if self.buttons: + self.components += f'
    {self.buttons}
    ' - def _get_file_icon(self, file_name: str, content_type: str = "", media_url: str = "") -> str: - """Return the most appropriate file icon for the given name or content type.""" - return Attachment.resolve_file_icon(file_name, content_type, media_url) + return self.components async def build_component(self, c): - # Check for component type attribute component_type = getattr(c, "type", None) # Handle legacy components (v1) if isinstance(c, discord.Button): return await self.build_button(c) - elif isinstance(c, discord.SelectMenu): + if isinstance(c, discord.SelectMenu): menu_html = await self.build_menu(c) Component.menu_div_id += 1 return menu_html @@ -151,40 +122,15 @@ async def build_component(self, c): return "" type_value = component_type.value if hasattr(component_type, "value") else component_type - - # ActionRow (type 1) - contains buttons/selects - if type_value == 1: - return await self.build_action_row(c) - # Button (type 2) - elif type_value == 2: - return await self.build_button(c) - # StringSelect (type 3) - elif type_value == 3: - menu_html = await self.build_menu(c) - Component.menu_div_id += 1 - return menu_html - # Section (type 9) - elif type_value == 9: - return await self.build_section(c) - # TextDisplay (type 10) - elif type_value == 10: - return await self.build_text_display(c) - # Thumbnail (type 11) - elif type_value == 11: - return await self.build_thumbnail(c) - # MediaGallery (type 12) - elif type_value == 12: - return await self.build_media_gallery(c) - # File (type 13) - elif type_value == 13: - return await self.build_file(c) - # Separator (type 14) - elif type_value == 14: - return await self.build_separator(c) - # Container (type 17) - elif type_value == 17: - return await self.build_container(c) - + + method_name = self._type_map.get(type_value) + if method_name: + method = getattr(self, method_name) + result = await method(c) + if type_value == 3: # StringSelect + Component.menu_div_id += 1 + return result + return "" async def build_action_row(self, c): @@ -216,7 +162,8 @@ async def build_button(self, c): target = "" icon = "" - label = str(self._get_attr(c, "label", "") or "") + label = self._get_attr(c, "label", "") or "" + label = html.escape(str(label)) if label else "" raw_style = self._get_attr(c, "style", None) style_key = "" if isinstance(raw_style, int): @@ -274,6 +221,9 @@ async def build_menu(self, c): if not disabled: content = await self.build_menu_options(options) + selected_label = html.escape(selected_label) if selected_label else "" + placeholder = html.escape(placeholder) if placeholder else "" + menu_html = await fill_out( self.guild, component_menu, @@ -298,6 +248,9 @@ async def build_menu_options(self, options): default_class = "dropdownContentSelected" if is_default else "" check_mark = "✓" if is_default else "" + label_escaped = html.escape(str(label)) if label else "" + description_escaped = html.escape(str(description)) if description else "" + if option_emoji: content.append( await fill_out( @@ -305,8 +258,8 @@ async def build_menu_options(self, options): component_menu_options_emoji, [ ("EMOJI", str(option_emoji), PARSE_MODE_EMOJI), - ("TITLE", str(label), PARSE_MODE_MARKDOWN), - ("DESCRIPTION", str(description) if description else "", PARSE_MODE_MARKDOWN), + ("TITLE", label_escaped, PARSE_MODE_MARKDOWN), + ("DESCRIPTION", description_escaped, PARSE_MODE_MARKDOWN), ("DEFAULT_CLASS", default_class, PARSE_MODE_NONE), ("CHECK", check_mark, PARSE_MODE_NONE), ], @@ -318,8 +271,8 @@ async def build_menu_options(self, options): self.guild, component_menu_options, [ - ("TITLE", str(label), PARSE_MODE_MARKDOWN), - ("DESCRIPTION", str(description) if description else "", PARSE_MODE_MARKDOWN), + ("TITLE", label_escaped, PARSE_MODE_MARKDOWN), + ("DESCRIPTION", description_escaped, PARSE_MODE_MARKDOWN), ("DEFAULT_CLASS", default_class, PARSE_MODE_NONE), ("CHECK", check_mark, PARSE_MODE_NONE), ], @@ -416,13 +369,14 @@ async def build_section(self, c): async def build_text_display(self, c): """Build a text display component""" - content = getattr(c, "content", "") + content = self._get_attr(c, "content", "") + content = html.escape(str(content)) if content else "" return await fill_out( self.guild, component_text_display, [ - ("CONTENT", str(content), PARSE_MODE_EMBED), + ("CONTENT", content, PARSE_MODE_EMBED), ], ) @@ -432,40 +386,21 @@ async def build_thumbnail(self, c): description = self._get_attr(c, "description", None) spoiler = bool(self._get_attr(c, "spoiler", False)) - url = self._get_media_url(media) - if not url: + props = self._build_media_properties(media, description, spoiler, "thumbnail") + if not props: return "" - file_name = self._file_display_name(url) - related_attachment = self._find_related_attachment(media, file_name) - if not description and related_attachment: - description = getattr(related_attachment, "description", None) - spoiler_class = "chatlog__component-spoiler" if spoiler else "" - description_text = description if description else "" - description_overlay = "" - spoiler_label = "" - title_text = description_text - alt_text = description_text or file_name - - if spoiler: - spoiler_label = '
    SPOILER
    ' - title_text = "Spoiler" - alt_text = "Spoiler" - description_overlay = "" - elif description: - description_overlay = f'
    {description}
    ' - return await fill_out( self.guild, component_thumbnail, [ - ("URL", str(url), PARSE_MODE_NONE), - ("TITLE", title_text, PARSE_MODE_MARKDOWN), - ("ALT", alt_text, PARSE_MODE_MARKDOWN), - ("DESCRIPTION", description_text, PARSE_MODE_MARKDOWN), - ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), - ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), - ("DESCRIPTION_OVERLAY", description_overlay, PARSE_MODE_NONE), + ("URL", props["URL"], PARSE_MODE_NONE), + ("TITLE", props["TITLE"], PARSE_MODE_MARKDOWN), + ("ALT", props["ALT"], PARSE_MODE_MARKDOWN), + ("DESCRIPTION", props["DESCRIPTION"], PARSE_MODE_MARKDOWN), + ("SPOILER_CLASS", props["SPOILER_CLASS"], PARSE_MODE_NONE), + ("SPOILER_TAG", props["SPOILER_TAG"], PARSE_MODE_NONE), + ("DESCRIPTION_OVERLAY", props["DESCRIPTION_OVERLAY"], PARSE_MODE_NONE), ], ) @@ -504,40 +439,21 @@ async def build_media_gallery_item(self, item): description = self._get_attr(item, "description", None) spoiler = bool(self._get_attr(item, "spoiler", False)) - url = self._get_media_url(media) - if not url: + props = self._build_media_properties(media, description, spoiler, "media") + if not props: return "" - file_name = self._file_display_name(url) - related_attachment = self._find_related_attachment(media, file_name) - if not description and related_attachment: - description = getattr(related_attachment, "description", None) - - spoiler_class = "chatlog__component-spoiler" if spoiler else "" - description_text = description if description else "" - description_overlay = "" - spoiler_label = "" - title_text = description_text - alt_text = description_text or file_name - - if spoiler: - spoiler_label = '
    SPOILER
    ' - title_text = "Spoiler" - alt_text = "Spoiler" - elif description: - description_overlay = f'
    {description}
    ' - return await fill_out( self.guild, component_media_gallery_item, [ - ("URL", str(url), PARSE_MODE_NONE), - ("TITLE", title_text, PARSE_MODE_MARKDOWN), - ("ALT", alt_text, PARSE_MODE_MARKDOWN), - ("DESCRIPTION", description_text, PARSE_MODE_MARKDOWN), - ("SPOILER_CLASS", spoiler_class, PARSE_MODE_NONE), - ("SPOILER_TAG", spoiler_label, PARSE_MODE_NONE), - ("DESCRIPTION_OVERLAY", description_overlay, PARSE_MODE_NONE), + ("URL", props["URL"], PARSE_MODE_NONE), + ("TITLE", props["TITLE"], PARSE_MODE_MARKDOWN), + ("ALT", props["ALT"], PARSE_MODE_MARKDOWN), + ("DESCRIPTION", props["DESCRIPTION"], PARSE_MODE_MARKDOWN), + ("SPOILER_CLASS", props["SPOILER_CLASS"], PARSE_MODE_NONE), + ("SPOILER_TAG", props["SPOILER_TAG"], PARSE_MODE_NONE), + ("DESCRIPTION_OVERLAY", props["DESCRIPTION_OVERLAY"], PARSE_MODE_NONE), ], ) @@ -605,23 +521,111 @@ async def build_file(self, c): ], ) - async def flow(self): - # Try to handle the component directly - component_html = await self.build_component(self.component) - if component_html: - self.components += component_html + # --- Utility Helpers --- + + def _get_attr(self, obj, key, default=None): + if isinstance(obj, dict): + val = obj.get(key, default) else: - # Fallback to legacy flow for action rows with children - children = getattr(self.component, "children", []) or getattr(self.component, "components", []) - for c in children: - child_html = await self.build_component(c) - if child_html: - self.buttons += child_html + val = getattr(obj, key, default) + return val if val != self.check_against else default - if self.menus: - self.components += f'
    {self.menus}
    ' + def _build_media_properties(self, media, description, spoiler, css_class_prefix="thumbnail"): + url = self._get_media_url(media) + if not url: + return None + + file_name = self._file_display_name(url) + related_attachment = self._find_related_attachment(media, file_name) + if not description and related_attachment: + description = getattr(related_attachment, "description", None) - if self.buttons: - self.components += f'
    {self.buttons}
    ' + spoiler_class = "chatlog__component-spoiler" if spoiler else "" + spoiler_label = '
    SPOILER
    ' if spoiler else "" + + title_text = "Spoiler" if spoiler else description + alt_text = "Spoiler" if spoiler else (description or file_name) + description_text = description + + description_overlay = "" + if not spoiler and description: + description_overlay = f'
    {{html.escape(str(description))}}
    ' + + return { + "URL": str(url), + "TITLE": html.escape(str(title_text)) if title_text else "", + "ALT": html.escape(str(alt_text)) if alt_text else "", + "DESCRIPTION": html.escape(str(description_text)) if description_text else "", + "SPOILER_CLASS": spoiler_class, + "SPOILER_TAG": spoiler_label, + "DESCRIPTION_OVERLAY": description_overlay, + } - return self.components + @staticmethod + def _get_media_url(media): + """Return a best-effort URL string from a media/file object, dict, or raw string.""" + if not media: + return "" + if isinstance(media, str): + return media + if isinstance(media, dict): + return str(media.get("url", "")) + return str(getattr(media, "url", "")) + + @staticmethod + def _stringify_emoji(emoji_obj): + """Return a displayable emoji string from dict/emoji.""" + if not emoji_obj: + return "" + if isinstance(emoji_obj, dict): + emoji_id = emoji_obj.get("id") + emoji_name = emoji_obj.get("name") or "" + if emoji_id: + return f"<:{emoji_name}:{emoji_id}>" + return emoji_name + return str(emoji_obj) + + @staticmethod + def _file_display_name(url: str) -> str: + """Return a clean filename without query/fragment.""" + if not url: + return "" + if url.startswith("attachment://"): + return url.replace("attachment://", "") + + parsed = urlparse(url) + path_name = parsed.path.rsplit("/", 1)[-1] if parsed.path else url + return path_name or url + + def _find_related_attachment(self, media, file_name: str): + """Attempt to match a component media item to a real attachment for metadata.""" + if not self.attachments: + return None + + attachment_id = getattr(media, "attachment_id", None) if media else None + if attachment_id is not None: + for attachment in self.attachments: + if getattr(attachment, "id", None) == attachment_id: + return attachment + + for attachment in self.attachments: + if file_name and str(getattr(attachment, "filename", "")) == file_name: + return attachment + + media_url = self._get_media_url(media) + if media_url: + for attachment in self.attachments: + if getattr(attachment, "url", None) == media_url or getattr(attachment, "proxy_url", None) == media_url: + return attachment + + return None + + @staticmethod + def _get_file_extension(name: str) -> str: + if not name or "." not in name: + return "" + return name.rsplit(".", 1)[-1].lower() + + def _get_file_icon(self, file_name: str, content_type: str = "", media_url: str = "") -> str: + """Return the most appropriate file icon for the given name or content type.""" + return Attachment.resolve_file_icon(file_name, content_type, media_url) diff --git a/chat_exporter/construct/assets/embed.py b/chat_exporter/construct/assets/embed.py index 45b3d1b..838d7f6 100644 --- a/chat_exporter/construct/assets/embed.py +++ b/chat_exporter/construct/assets/embed.py @@ -210,7 +210,7 @@ async def build_author(self): async def build_image(self): self.image = ( - await fill_out(self.guild, embed_image, [("EMBED_IMAGE", str(self.embed.image.proxy_url), PARSE_MODE_NONE)]) + await fill_out(self.guild, embed_image, [("EMBED_IMAGE", str(self.embed.image.url), PARSE_MODE_NONE)]) if self.embed.image and self.embed.image.url != self.check_against else "" ) diff --git a/chat_exporter/construct/message.py b/chat_exporter/construct/message.py index 5238b36..3e37c4f 100644 --- a/chat_exporter/construct/message.py +++ b/chat_exporter/construct/message.py @@ -70,6 +70,7 @@ def __init__( attachment_handler: Optional[AttachmentHandler], ): self.message = message + self.rendered_content = "" self.previous_message = previous_message self.pytz_timezone = pytz_timezone self.military_time = military_time @@ -197,7 +198,7 @@ async def build_meta_data(self): async def build_content(self): if not self.message.content and not self.get_message_snapshots(): - self.message.content = "" + self.rendered_content = "" return if self.message_edited_at: @@ -213,7 +214,7 @@ async def build_content(self): combined = html.escape(combined or "") - self.message.content = await fill_out( + self.rendered_content = await fill_out( self.guild, message_content, [ @@ -350,7 +351,7 @@ async def build_sticker(self): f"https://cdn.jsdelivr.net/gh/mahtoid/DiscordUtils@master/stickers/{sticker.pack_id}/{sticker.id}.gif" ) - self.message.content = await fill_out( + self.rendered_content = await fill_out( self.guild, img_attachment, [ @@ -370,9 +371,9 @@ def calculate_grid_splits(n): if n == 6: return [3, 3] if n == 7: - return [3, 4] + return [1, 3, 3] if n == 8: - return [4, 4] + return [2, 3, 3] if n == 10: return [1, 9] @@ -411,8 +412,8 @@ async def flush_media_group(group): html_output = "" splits = self.calculate_grid_splits(len(group)) start = 0 - for i, s in enumerate(splits): - html_output += await AttachmentGrid(group[start : start + s], self.guild, i).flow() + for s in splits: + html_output += await AttachmentGrid(group[start : start + s], self.guild, len(group)).flow() start += s return html_output @@ -455,8 +456,8 @@ async def wrap_forwarded(self): if not self.forwarded: return - self.message.content = ( - f'
    {message_forwarded}{self.message.content}' + self.rendered_content = ( + f'
    {message_forwarded}{self.rendered_content}' f"{self.attachments}{self.forwarded_embeds}{self.components}
    " ) @@ -475,7 +476,7 @@ async def build_message_template(self): message_body, [ ("MESSAGE_ID", str(self.message.id)), - ("MESSAGE_CONTENT", self.message.content, PARSE_MODE_NONE), + ("MESSAGE_CONTENT", self.rendered_content, PARSE_MODE_NONE), ("EMBEDS", self.embeds, PARSE_MODE_NONE), ("ATTACHMENTS", self.attachments, PARSE_MODE_NONE), ("COMPONENTS", self.components, PARSE_MODE_NONE), @@ -549,7 +550,7 @@ async def generate_message_divider(self, channel_audit=False): ("TIMESTAMP", str(self.message_created_at)), ("DEFAULT_TIMESTAMP", str(default_timestamp), PARSE_MODE_NONE), ("MESSAGE_ID", str(self.message.id)), - ("MESSAGE_CONTENT", self.message.content, PARSE_MODE_NONE), + ("MESSAGE_CONTENT", self.rendered_content, PARSE_MODE_NONE), ("EMBEDS", self.embeds, PARSE_MODE_NONE), ("ATTACHMENTS", self.attachments, PARSE_MODE_NONE), ("COMPONENTS", self.components, PARSE_MODE_NONE), diff --git a/chat_exporter/html/attachment/video.html b/chat_exporter/html/attachment/video.html index 700915f..7d323af 100644 --- a/chat_exporter/html/attachment/video.html +++ b/chat_exporter/html/attachment/video.html @@ -1,3 +1,3 @@
    - +
    \ No newline at end of file diff --git a/chat_exporter/html/base.html b/chat_exporter/html/base.html index cb33fb7..59d5c96 100644 --- a/chat_exporter/html/base.html +++ b/chat_exporter/html/base.html @@ -5,6 +5,7 @@ + {{SERVER_NAME_SAFE}} - {{CHANNEL_NAME_SAFE}} @@ -13,7 +14,7 @@ + Generated on: {{DATE_TIME}}" /> @@ -22,7 +23,7 @@ + Generated on: {{DATE_TIME}}" /> @@ -30,38 +31,38 @@ + Generated on: {{DATE_TIME}}" /> - + @@ -2004,315 +2035,300 @@ block: 'center', }); - setTimeout(function() { + setTimeout(function () { element.classList.remove('chatlog__message-container--highlighted'); }, 1500); } } - - function showSpoiler(event, element) { - if (element && element.classList.contains('spoiler--hidden')) { - event.preventDefault(); - element.classList.remove('spoiler--hidden'); + < !--Spoiler(|| Spoiler ||) --> + function showSpoiler(event, element) { + if (element && element.classList.contains('spoiler--hidden')) { + event.preventDefault(); + element.classList.remove('spoiler--hidden'); + } } - } - - function showDropdown(dropdownID) { - const button = document.getElementById("dropdownButton" + dropdownID); - const menuContent = document.getElementById("dropdownMenuContent" + dropdownID); - const menu = document.getElementById("dropdownMenu" + dropdownID); - const isOpen = button.getAttribute("aria-expanded") === "true"; - button.setAttribute("aria-expanded", isOpen ? "false" : "true"); - button.classList.toggle("chatlog__component-dropdown-border"); - if (menuContent) { - menuContent.classList.toggle("chatlog__component-dropdown-show"); - } - if (menu) { - menu.classList.toggle("chatlog__component-dropdown-show"); - } - } - - - document.addEventListener('DOMContentLoaded', () => { - const spoilerTargets = document.querySelectorAll( - '.chatlog__component-thumbnail.chatlog__component-spoiler, ' + - '.chatlog__component-media-item.chatlog__component-spoiler, ' + - '.chatlog__component-container.chatlog__component-spoiler, ' + - '.chatlog__component-file.chatlog__component-spoiler' - ); - spoilerTargets.forEach((el) => { - el.addEventListener('click', (e) => { - if (!el.classList.contains('chatlog__component-spoiler')) return; - e.preventDefault(); - el.classList.remove('chatlog__component-spoiler'); - const label = el.querySelector('.chatlog__component-spoiler-label'); - if (label) { - label.remove(); - } - }); - }); - }); + < !--Menu Dropdown(Selectmenu)-- > + function showDropdown(dropdownID) { + const button = document.getElementById("dropdownButton" + dropdownID); + const menuContent = document.getElementById("dropdownMenuContent" + dropdownID); + const menu = document.getElementById("dropdownMenu" + dropdownID); + const isOpen = button.getAttribute("aria-expanded") === "true"; + button.setAttribute("aria-expanded", isOpen ? "false" : "true"); + button.classList.toggle("chatlog__component-dropdown-border"); + if (menuContent) { + menuContent.classList.toggle("chatlog__component-dropdown-show"); + } + if (menu) { + menu.classList.toggle("chatlog__component-dropdown-show"); + } + } - - document.addEventListener('DOMContentLoaded', () => { - document.querySelectorAll('.pre--multiline').forEach((block) => { - hljs.highlightBlock(block); + < !--Component Spoiler(click to reveal thumbnails / media)-- > + document.addEventListener('DOMContentLoaded', () => { + const spoilerTargets = document.querySelectorAll( + '.chatlog__component-thumbnail.chatlog__component-spoiler, ' + + '.chatlog__component-media-item.chatlog__component-spoiler, ' + + '.chatlog__component-container.chatlog__component-spoiler, ' + + '.chatlog__component-file.chatlog__component-spoiler' + ); + spoilerTargets.forEach((el) => { + el.addEventListener('click', (e) => { + if (!el.classList.contains('chatlog__component-spoiler')) return; + e.preventDefault(); + el.classList.remove('chatlog__component-spoiler'); + const label = el.querySelector('.chatlog__component-spoiler-label'); + if (label) { + label.remove(); + } + }); + }); + }); + + < !--Code Block Markdown(```lang```)-- > + document.addEventListener('DOMContentLoaded', () => { + document.querySelectorAll('.pre--multiline').forEach((block) => { + hljs.highlightBlock(block); + }); }); - }); + -
    - - {{CHANNEL_NAME}} +
    + + {{CHANNEL_NAME}} - {{CHANNEL_TOPIC}} + {{CHANNEL_TOPIC}} -
    - Summary +
    + Summary +
    -
    -
    -
    - -
    +
    +
    + +
    -
    - Welcome to #{{CHANNEL_NAME}}! - {{SUBJECT}} -
    +
    + Welcome to #{{CHANNEL_NAME}}! + {{SUBJECT}} +
    -
    - {{MESSAGES}} +
    + {{MESSAGES}} +
    -
    - - -
    -
    Copy Message ID
    -
    + -
    -
    - Avatar +
    +
    Copy Message ID
    -
    -
    -
    {{SERVER_NAME}}
    -
    -
    -
    Guild ID
    -
    {{GUILD_ID}}
    -
    -
    -
    Channel ID
    -
    {{CHANNEL_ID}}
    -
    -
    -
    Channel Creation Date
    -
    {{CHANNEL_CREATED_AT}}
    -
    -
    -
    Total Message Count
    -
    {{MESSAGE_COUNT}}
    + +
    +
    + Avatar
    -
    -
    Total Message Participants
    -
    {{MESSAGE_PARTICIPANTS}}
    +
    +
    +
    {{SERVER_NAME}}
    +
    +
    +
    Guild ID
    +
    {{GUILD_ID}}
    +
    +
    +
    Channel ID
    +
    {{CHANNEL_ID}}
    +
    +
    +
    Channel Creation Date
    +
    {{CHANNEL_CREATED_AT}}
    +
    +
    +
    Total Message Count
    +
    {{MESSAGE_COUNT}}
    +
    +
    +
    Total Message Participants
    +
    {{MESSAGE_PARTICIPANTS}}
    +
    + {{SD}}
    - {{SD}}
    -
    -{{META_DATA}} - - + + { { FANCY_TIME } } + + \ No newline at end of file diff --git a/chat_exporter/parse/ast.py b/chat_exporter/parse/ast.py index 981dcf2..8148221 100644 --- a/chat_exporter/parse/ast.py +++ b/chat_exporter/parse/ast.py @@ -491,13 +491,15 @@ def _parse_inline(self, text: str) -> List[Node]: continue # Blockquote (>>>) - if (i == 0 or text[i - 1] == "\n") and text[i : i + 13] == ">>> ": - nodes.append(QuoteNode(self._parse_inline(text[i + 13 :]))) - break + if (i == 0 or text[i - 1] == "\n") and (text[i : i + 13] == ">>> " or text[i : i + 12] == ">>>"): + prefix_len = 13 if text[i : i + 13] == ">>> " else 12 + if text[i + prefix_len : i + prefix_len + 4] != ">": + nodes.append(QuoteNode(self._parse_inline(text[i + prefix_len :]))) + break # Single line quote (>) - if (i == 0 or text[i - 1] == "\n") and (text[i : i + 5] == "> " or text[i : i + 4] == ">"): - prefix_len = 5 if text[i : i + 5] == "> " else 4 + if (i == 0 or text[i - 1] == "\n") and text[i : i + 5] == "> ": + prefix_len = 5 endtag = text.find("\n", i + prefix_len) if endtag == -1: endtag = n diff --git a/chat_exporter/parse/markdown.py b/chat_exporter/parse/markdown.py index e81b449..9c1aa4f 100644 --- a/chat_exporter/parse/markdown.py +++ b/chat_exporter/parse/markdown.py @@ -42,7 +42,11 @@ def reverse_code_block_markdown(self): self.content = self.content.replace(f"{{{{CODE_BLOCK_{i}}}}}", block) async def standard_message_flow(self): - return await self.standard_embed_flow() + ast = AstParser() + nodes = ast.parse(self.content) + self.content = "".join(n.render(self.guild, self.bot) for n in nodes) + await self.parse_emoji() + return self.content async def link_embed_flow(self): ast = AstParser() @@ -81,6 +85,13 @@ async def parse_emoji(self): [r"", ''], ) + shield_blocks = [] + def repl(match): + shield_blocks.append(match.group(0)) + return f"{{{{SHIELD_{len(shield_blocks) - 1}}}}}" + self.content = re.sub(r'
    .*?', repl, self.content, flags=re.DOTALL) + self.content = await convert_emoji([word for word in self.content]) for p, r in holder: @@ -92,3 +103,6 @@ def repl(match): return repl self.content = re.sub(p, make_repl(r), self.content) + + for i, block in enumerate(shield_blocks): + self.content = self.content.replace(f"{{{{SHIELD_{i}}}}}", block) diff --git a/tests/test_ast.py b/tests/test_ast.py index ef2073d..9799f67 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -53,6 +53,15 @@ def test_multiline_blockquote(self): '
    Multiline
    Quote
    Block
    ', ) + def test_invalid_quotes_rendered_as_text(self): + text = ">> Quote 1\n>>>>>>>>> Quote 2" + nodes = self.parser.parse(text) + out = "".join(n.render() for n in nodes) + self.assertEqual( + out, + ">> Quote 1
    >>>>>>>>> Quote 2", + ) + def test_heading_newline_stripping(self): text = "# Heading\n\n\n\nTest" nodes = self.parser.parse(text) diff --git a/tests/test_export.py b/tests/test_export.py index 2796193..67b12d5 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -129,11 +129,11 @@ def _make_embed( embed.image = MagicMock() embed.image.url = image_url - embed.image.proxy_url = image_url + embed.image.url = image_url embed.thumbnail = MagicMock() embed.thumbnail.url = thumbnail_url - embed.thumbnail.proxy_url = thumbnail_url + embed.thumbnail.url = thumbnail_url embed.timestamp = timestamp embed.url = url @@ -423,7 +423,7 @@ def test_message_with_multiple_embeds_and_attachments(self): att1 = MagicMock(spec=discord.Attachment) att1.url = "https://mahto.id/assets/me.png" - att1.proxy_url = "https://mahto.id/assets/me.png" + att1.url = "https://mahto.id/assets/me.png" att1.filename = "file1.png" att1.size = 1048576 att1.content_type = "image/png" @@ -431,7 +431,7 @@ def test_message_with_multiple_embeds_and_attachments(self): att2 = MagicMock(spec=discord.Attachment) att2.url = "https://mahto.id/assets/me.png" - att2.proxy_url = "https://mahto.id/assets/me.png" + att2.url = "https://mahto.id/assets/me.png" att2.filename = "file2.txt" att2.size = 1024 att2.content_type = "text/plain" @@ -454,7 +454,7 @@ def test_image_spoiler(self): att = MagicMock(spec=discord.Attachment) att.url = "https://mahto.id/assets/me.png" - att.proxy_url = "https://mahto.id/assets/me.png" + att.url = "https://mahto.id/assets/me.png" att.filename = "SPOILER_file.png" att.size = 100 att.content_type = "image/png" @@ -474,7 +474,7 @@ def test_image_grid_2(self): att1 = MagicMock(spec=discord.Attachment) att1.url = "https://mahto.id/assets/me.png" - att1.proxy_url = "https://mahto.id/assets/me.png" + att1.url = "https://mahto.id/assets/me.png" att1.filename = "img1.png" att1.size = 100 att1.content_type = "image/png" @@ -482,7 +482,7 @@ def test_image_grid_2(self): att2 = MagicMock(spec=discord.Attachment) att2.url = "https://mahto.id/assets/me.png" - att2.proxy_url = "https://mahto.id/assets/me.png" + att2.url = "https://mahto.id/assets/me.png" att2.filename = "img2.png" att2.size = 100 att2.content_type = "image/png" @@ -503,7 +503,7 @@ def test_image_grid_3(self): for i in range(3): att = MagicMock(spec=discord.Attachment) att.url = "https://mahto.id/assets/me.png" - att.proxy_url = "https://mahto.id/assets/me.png" + att.url = "https://mahto.id/assets/me.png" att.filename = f"img{i}.png" att.size = 100 att.content_type = "image/png" @@ -525,7 +525,7 @@ def test_image_grid_4(self): for i in range(4): att = MagicMock(spec=discord.Attachment) att.url = "https://mahto.id/assets/me.png" - att.proxy_url = "https://mahto.id/assets/me.png" + att.url = "https://mahto.id/assets/me.png" att.filename = f"img{i}.png" att.size = 100 att.content_type = "image/png" @@ -547,7 +547,7 @@ def test_image_grid_5(self): for i in range(5): att = MagicMock(spec=discord.Attachment) att.url = "https://mahto.id/assets/me.png" - att.proxy_url = "https://mahto.id/assets/me.png" + att.url = "https://mahto.id/assets/me.png" att.filename = f"img{i}.png" att.size = 100 att.content_type = "image/png" @@ -570,7 +570,7 @@ def test_image_grid_10(self): for i in range(10): att = MagicMock(spec=discord.Attachment) att.url = "https://mahto.id/assets/me.png" - att.proxy_url = "https://mahto.id/assets/me.png" + att.url = "https://mahto.id/assets/me.png" att.filename = f"img{i}.png" att.size = 100 att.content_type = "image/png" @@ -591,7 +591,7 @@ def test_grid_width_restriction(self): guild = _make_guild() att = MagicMock(spec=discord.Attachment) att.url = "https://mahto.id/assets/me.png" - att.proxy_url = "https://mahto.id/assets/me.png" + att.url = "https://mahto.id/assets/me.png" att.filename = "img1.png" att.size = 100 att.content_type = "image/png"