diff --git a/libraries/matrixui/src/main/kotlin/io/element/android/libraries/matrix/ui/messages/HtmlTableToText.kt b/libraries/matrixui/src/main/kotlin/io/element/android/libraries/matrix/ui/messages/HtmlTableToText.kt new file mode 100644 index 00000000000..56dd551e76a --- /dev/null +++ b/libraries/matrixui/src/main/kotlin/io/element/android/libraries/matrix/ui/messages/HtmlTableToText.kt @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2025 Element Creations Ltd. + * + * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial. + * Please see LICENSE files in the repository root for full details. + */ + +package io.element.android.libraries.matrix.ui.messages + +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element + +/** + * Finds all `` elements in the document and replaces them in-place + * with `
` blocks containing a pipe-based text representation.
+ */
+fun Document.convertTablesToText() {
+    // Snapshot the list to avoid concurrent modification
+    val tables = getElementsByTag("table").toList()
+    for (table in tables) {
+        val rows = extractRows(table)
+        if (rows.isEmpty()) {
+            table.remove()
+            continue
+        }
+
+        val maxCols = rows.maxOf { it.size }
+        // Pad rows with fewer cells
+        val normalizedRows = rows.map { row ->
+            row + List(maxCols - row.size) { "" }
+        }
+
+        val headerRowCount = detectHeaderRowCount(table)
+
+        val colWidths = IntArray(maxCols) { col ->
+            normalizedRows.maxOf { it[col].length }.coerceAtLeast(1)
+        }
+
+        val lines = buildList {
+            for ((i, row) in normalizedRows.withIndex()) {
+                add(formatRow(row, colWidths))
+                if (i == headerRowCount - 1 && headerRowCount > 0) {
+                    add(formatSeparator(colWidths))
+                }
+            }
+        }
+
+        val text = lines.joinToString("\n")
+        val pre = Element("pre")
+        val code = Element("code")
+        code.appendText(text)
+        pre.appendChild(code)
+        table.replaceWith(pre)
+    }
+}
+
+private fun extractRows(table: Element): List> {
+    val rows = mutableListOf>()
+
+    val thead = table.getElementsByTag("thead").first()
+    val tbody = table.getElementsByTag("tbody").first()
+
+    if (thead != null) {
+        for (tr in thead.getElementsByTag("tr")) {
+            rows.add(extractCells(tr))
+        }
+    }
+    if (tbody != null) {
+        for (tr in tbody.getElementsByTag("tr")) {
+            rows.add(extractCells(tr))
+        }
+    }
+
+    // If no thead/tbody, get tr elements directly from the table
+    if (thead == null && tbody == null) {
+        for (tr in table.getElementsByTag("tr")) {
+            rows.add(extractCells(tr))
+        }
+    }
+
+    return rows
+}
+
+private fun extractCells(tr: Element): List {
+    return tr.children()
+        .filter { it.tagName() == "th" || it.tagName() == "td" }
+        .map { it.text().trim() }
+}
+
+/**
+ * Detects the number of header rows.
+ * If `
` exists, its row count is used. + * Otherwise, if the first row contains only ` contains only
` elements, it's treated as a header. + */ +private fun detectHeaderRowCount(table: Element): Int { + val thead = table.getElementsByTag("thead").first() + if (thead != null) { + return thead.getElementsByTag("tr").size + } + + // Check if the first
elements + val firstTr = table.getElementsByTag("tr").firstOrNull() ?: return 0 + val cells = firstTr.children().filter { it.tagName() == "th" || it.tagName() == "td" } + return if (cells.isNotEmpty() && cells.all { it.tagName() == "th" }) 1 else 0 +} + +private fun formatRow(cells: List, colWidths: IntArray): String { + return cells.mapIndexed { i, cell -> + cell.padEnd(colWidths[i]) + }.joinToString(" | ") +} + +private fun formatSeparator(colWidths: IntArray): String { + return colWidths.joinToString("-+-") { "-".repeat(it) } +} diff --git a/libraries/matrixui/src/main/kotlin/io/element/android/libraries/matrix/ui/messages/ToHtmlDocument.kt b/libraries/matrixui/src/main/kotlin/io/element/android/libraries/matrix/ui/messages/ToHtmlDocument.kt index ee9de516817..c3f10e2824c 100644 --- a/libraries/matrixui/src/main/kotlin/io/element/android/libraries/matrix/ui/messages/ToHtmlDocument.kt +++ b/libraries/matrixui/src/main/kotlin/io/element/android/libraries/matrix/ui/messages/ToHtmlDocument.kt @@ -13,6 +13,7 @@ import io.element.android.libraries.matrix.api.permalink.PermalinkParser import io.element.android.libraries.matrix.api.timeline.item.event.FormattedBody import io.element.android.libraries.matrix.api.timeline.item.event.MessageFormat import io.element.android.wysiwyg.utils.HtmlToDomParser +import org.jsoup.Jsoup import org.jsoup.nodes.Document /** @@ -33,11 +34,12 @@ fun FormattedBody.toHtmlDocument( // We don't trim the start in case it's used as indentation. ?.trimEnd() ?.let { formattedBody -> - val dom = if (prefix != null) { - HtmlToDomParser.document("$prefix $formattedBody") - } else { - HtmlToDomParser.document(formattedBody) - } + val htmlToProcess = if (prefix != null) "$prefix $formattedBody" else formattedBody + + // Convert tables to pre/code blocks before the wysiwyg safelist strips them + val processedHtml = convertTablesInHtml(htmlToProcess) + + val dom = HtmlToDomParser.document(processedHtml) // Prepend `@` to mentions fixMentions(dom, permalinkParser) @@ -46,6 +48,13 @@ fun FormattedBody.toHtmlDocument( } } +private fun convertTablesInHtml(html: String): String { + if ("
AB
CD
" + ) + doc.convertTablesToText() + + val code = doc.selectFirst("pre > code") + assertThat(code).isNotNull() + assertThat(code!!.wholeText()).isEqualTo( + "A | B\n" + + "C | D" + ) + } + + @Test + fun `table with thead header`() { + val doc = Jsoup.parse( + "" + + "" + + "" + + "" + + "
Header AHeader B
Cell 1Cell 2
Cell 3Cell 4
" + ) + doc.convertTablesToText() + + val code = doc.selectFirst("pre > code") + assertThat(code).isNotNull() + assertThat(code!!.wholeText()).isEqualTo( + "Header A | Header B\n" + + "---------+---------\n" + + "Cell 1 | Cell 2 \n" + + "Cell 3 | Cell 4 " + ) + } + + @Test + fun `table with th in first row and no thead`() { + val doc = Jsoup.parse( + "" + + "" + + "" + + "
NameAge
Alice30
" + ) + doc.convertTablesToText() + + val code = doc.selectFirst("pre > code") + assertThat(code).isNotNull() + assertThat(code!!.wholeText()).isEqualTo( + "Name | Age\n" + + "------+----\n" + + "Alice | 30 " + ) + } + + @Test + fun `unequal column counts - shorter rows padded`() { + val doc = Jsoup.parse( + "" + + "" + + "" + + "
ABC
D
" + ) + doc.convertTablesToText() + + val code = doc.selectFirst("pre > code") + assertThat(code).isNotNull() + assertThat(code!!.wholeText()).isEqualTo( + "A | B | C\n" + + "D | | " + ) + } + + @Test + fun `empty table - no rows`() { + val doc = Jsoup.parse("
") + doc.convertTablesToText() + + // Table should be removed, no pre/code created + assertThat(doc.selectFirst("table")).isNull() + assertThat(doc.selectFirst("pre")).isNull() + } + + @Test + fun `table with surrounding content`() { + val doc = Jsoup.parse( + "

Before

X

After

" + ) + doc.convertTablesToText() + + assertThat(doc.selectFirst("table")).isNull() + assertThat(doc.text()).contains("Before") + assertThat(doc.text()).contains("After") + assertThat(doc.selectFirst("pre > code")).isNotNull() + assertThat(doc.selectFirst("pre > code")!!.text()).isEqualTo("X") + } + + @Test + fun `multiple tables converted independently`() { + val doc = Jsoup.parse( + "
T1
" + + "
T2
" + ) + doc.convertTablesToText() + + val codes = doc.select("pre > code") + assertThat(codes).hasSize(2) + assertThat(codes[0].text()).isEqualTo("T1") + assertThat(codes[1].text()).isEqualTo("T2") + } + + @Test + fun `single column table - no pipes`() { + val doc = Jsoup.parse( + "
Row 1
Row 2
" + ) + doc.convertTablesToText() + + val code = doc.selectFirst("pre > code") + assertThat(code).isNotNull() + assertThat(code!!.wholeText()).isEqualTo( + "Row 1\n" + + "Row 2" + ) + } + + @Test + fun `cell content with extra whitespace is trimmed`() { + val doc = Jsoup.parse( + "
hello world
" + ) + doc.convertTablesToText() + + val code = doc.selectFirst("pre > code") + assertThat(code).isNotNull() + assertThat(code!!.wholeText()).isEqualTo("hello | world") + } + + @Test + fun `integration via toHtmlDocument - table is replaced`() { + val body = FormattedBody( + format = MessageFormat.HTML, + body = "

Info:

" + + "
KeyValue
A1
" + ) + + val document = body.toHtmlDocument(permalinkParser = FakePermalinkParser()) + assertThat(document).isNotNull() + // Table should have been replaced + assertThat(document!!.selectFirst("table")).isNull() + val code = document.selectFirst("pre > code") + assertThat(code).isNotNull() + assertThat(code!!.wholeText()).isEqualTo( + "Key | Value\n" + + "----+------\n" + + "A | 1 " + ) + } +} diff --git a/libraries/matrixui/src/test/kotlin/io/element/android/libraries/matrix/ui/messages/ToPlainTextTest.kt b/libraries/matrixui/src/test/kotlin/io/element/android/libraries/matrix/ui/messages/ToPlainTextTest.kt index 607f8254014..977019e8b4b 100644 --- a/libraries/matrixui/src/test/kotlin/io/element/android/libraries/matrix/ui/messages/ToPlainTextTest.kt +++ b/libraries/matrixui/src/test/kotlin/io/element/android/libraries/matrix/ui/messages/ToPlainTextTest.kt @@ -120,6 +120,22 @@ class ToPlainTextTest { ) } + @Test + fun `FormattedBody toPlainText - converts table to plain text`() { + val formattedBody = FormattedBody( + format = MessageFormat.HTML, + body = "" + + "
NameAge
Alice30
" + ) + val result = formattedBody.toPlainText(permalinkParser = FakePermalinkParser()) + // PlainTextNodeVisitor normalizes whitespace from TextNode.text(), so + // newlines and extra spaces in the pipe table are collapsed. + assertThat(result).contains("Name") + assertThat(result).contains("Age") + assertThat(result).contains("Alice") + assertThat(result).contains("30") + } + @Test fun `TextMessageType toPlainText - returns the markdown body if the formatted one cannot be parsed`() { val messageType = TextMessageType(