Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (c) 2025 Element Creations Ltd.
*
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial.
* Please see LICENSE files in the repository root for full details.
*/

package io.element.android.libraries.matrix.ui.messages

import org.jsoup.nodes.Document
import org.jsoup.nodes.Element

/**
* Finds all `<table>` elements in the document and replaces them in-place
* with `<pre><code>` blocks containing a pipe-based text representation.
*/
fun Document.convertTablesToText() {
// Snapshot the list to avoid concurrent modification
val tables = getElementsByTag("table").toList()
for (table in tables) {
val rows = extractRows(table)
if (rows.isEmpty()) {
table.remove()
continue
}

val maxCols = rows.maxOf { it.size }
// Pad rows with fewer cells
val normalizedRows = rows.map { row ->
row + List(maxCols - row.size) { "" }
}

val headerRowCount = detectHeaderRowCount(table)

val colWidths = IntArray(maxCols) { col ->
normalizedRows.maxOf { it[col].length }.coerceAtLeast(1)
}

val lines = buildList {
for ((i, row) in normalizedRows.withIndex()) {
add(formatRow(row, colWidths))
if (i == headerRowCount - 1 && headerRowCount > 0) {
add(formatSeparator(colWidths))
}
}
}

val text = lines.joinToString("\n")
val pre = Element("pre")
val code = Element("code")
code.appendText(text)
pre.appendChild(code)
table.replaceWith(pre)
}
}

private fun extractRows(table: Element): List<List<String>> {
val rows = mutableListOf<List<String>>()

val thead = table.getElementsByTag("thead").first()
val tbody = table.getElementsByTag("tbody").first()

if (thead != null) {
for (tr in thead.getElementsByTag("tr")) {
rows.add(extractCells(tr))
}
}
if (tbody != null) {
for (tr in tbody.getElementsByTag("tr")) {
rows.add(extractCells(tr))
}
}

// If no thead/tbody, get tr elements directly from the table
if (thead == null && tbody == null) {
for (tr in table.getElementsByTag("tr")) {
rows.add(extractCells(tr))
}
}

return rows
}

private fun extractCells(tr: Element): List<String> {
return tr.children()
.filter { it.tagName() == "th" || it.tagName() == "td" }
.map { it.text().trim() }
}

/**
* Detects the number of header rows.
* If `<thead>` exists, its row count is used.
* Otherwise, if the first row contains only `<th>` elements, it's treated as a header.
*/
private fun detectHeaderRowCount(table: Element): Int {
val thead = table.getElementsByTag("thead").first()
if (thead != null) {
return thead.getElementsByTag("tr").size
}

// Check if the first <tr> contains only <th> elements
val firstTr = table.getElementsByTag("tr").firstOrNull() ?: return 0
val cells = firstTr.children().filter { it.tagName() == "th" || it.tagName() == "td" }
return if (cells.isNotEmpty() && cells.all { it.tagName() == "th" }) 1 else 0
}

private fun formatRow(cells: List<String>, colWidths: IntArray): String {
return cells.mapIndexed { i, cell ->
cell.padEnd(colWidths[i])
}.joinToString(" | ")
}

private fun formatSeparator(colWidths: IntArray): String {
return colWidths.joinToString("-+-") { "-".repeat(it) }
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import io.element.android.libraries.matrix.api.permalink.PermalinkParser
import io.element.android.libraries.matrix.api.timeline.item.event.FormattedBody
import io.element.android.libraries.matrix.api.timeline.item.event.MessageFormat
import io.element.android.wysiwyg.utils.HtmlToDomParser
import org.jsoup.Jsoup
import org.jsoup.nodes.Document

/**
Expand All @@ -33,11 +34,12 @@ fun FormattedBody.toHtmlDocument(
// We don't trim the start in case it's used as indentation.
?.trimEnd()
?.let { formattedBody ->
val dom = if (prefix != null) {
HtmlToDomParser.document("$prefix $formattedBody")
} else {
HtmlToDomParser.document(formattedBody)
}
val htmlToProcess = if (prefix != null) "$prefix $formattedBody" else formattedBody

// Convert tables to pre/code blocks before the wysiwyg safelist strips them
val processedHtml = convertTablesInHtml(htmlToProcess)

val dom = HtmlToDomParser.document(processedHtml)

// Prepend `@` to mentions
fixMentions(dom, permalinkParser)
Expand All @@ -46,6 +48,13 @@ fun FormattedBody.toHtmlDocument(
}
}

private fun convertTablesInHtml(html: String): String {
if ("<table" !in html) return html
val doc = Jsoup.parse(html)
doc.convertTablesToText()
return doc.body().html()
}

private fun fixMentions(
dom: Document,
permalinkParser: PermalinkParser,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
/*
* Copyright (c) 2025 Element Creations Ltd.
*
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial.
* Please see LICENSE files in the repository root for full details.
*/

package io.element.android.libraries.matrix.ui.messages

import com.google.common.truth.Truth.assertThat
import io.element.android.libraries.matrix.api.timeline.item.event.FormattedBody
import io.element.android.libraries.matrix.api.timeline.item.event.MessageFormat
import io.element.android.libraries.matrix.test.permalink.FakePermalinkParser
import org.jsoup.Jsoup
import org.junit.Test
import org.junit.runner.RunWith
import org.robolectric.RobolectricTestRunner

@RunWith(RobolectricTestRunner::class)
class HtmlTableToTextTest {
@Test
fun `simple 2x2 table without header`() {
val doc = Jsoup.parse(
"<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>"
)
doc.convertTablesToText()

val code = doc.selectFirst("pre > code")
assertThat(code).isNotNull()
assertThat(code!!.wholeText()).isEqualTo(
"A | B\n" +
"C | D"
)
}

@Test
fun `table with thead header`() {
val doc = Jsoup.parse(
"<table>" +
"<thead><tr><th>Header A</th><th>Header B</th></tr></thead>" +
"<tbody><tr><td>Cell 1</td><td>Cell 2</td></tr>" +
"<tr><td>Cell 3</td><td>Cell 4</td></tr></tbody>" +
"</table>"
)
doc.convertTablesToText()

val code = doc.selectFirst("pre > code")
assertThat(code).isNotNull()
assertThat(code!!.wholeText()).isEqualTo(
"Header A | Header B\n" +
"---------+---------\n" +
"Cell 1 | Cell 2 \n" +
"Cell 3 | Cell 4 "
)
}

@Test
fun `table with th in first row and no thead`() {
val doc = Jsoup.parse(
"<table>" +
"<tr><th>Name</th><th>Age</th></tr>" +
"<tr><td>Alice</td><td>30</td></tr>" +
"</table>"
)
doc.convertTablesToText()

val code = doc.selectFirst("pre > code")
assertThat(code).isNotNull()
assertThat(code!!.wholeText()).isEqualTo(
"Name | Age\n" +
"------+----\n" +
"Alice | 30 "
)
}

@Test
fun `unequal column counts - shorter rows padded`() {
val doc = Jsoup.parse(
"<table>" +
"<tr><td>A</td><td>B</td><td>C</td></tr>" +
"<tr><td>D</td></tr>" +
"</table>"
)
doc.convertTablesToText()

val code = doc.selectFirst("pre > code")
assertThat(code).isNotNull()
assertThat(code!!.wholeText()).isEqualTo(
"A | B | C\n" +
"D | | "
)
}

@Test
fun `empty table - no rows`() {
val doc = Jsoup.parse("<table></table>")
doc.convertTablesToText()

// Table should be removed, no pre/code created
assertThat(doc.selectFirst("table")).isNull()
assertThat(doc.selectFirst("pre")).isNull()
}

@Test
fun `table with surrounding content`() {
val doc = Jsoup.parse(
"<p>Before</p><table><tr><td>X</td></tr></table><p>After</p>"
)
doc.convertTablesToText()

assertThat(doc.selectFirst("table")).isNull()
assertThat(doc.text()).contains("Before")
assertThat(doc.text()).contains("After")
assertThat(doc.selectFirst("pre > code")).isNotNull()
assertThat(doc.selectFirst("pre > code")!!.text()).isEqualTo("X")
}

@Test
fun `multiple tables converted independently`() {
val doc = Jsoup.parse(
"<table><tr><td>T1</td></tr></table>" +
"<table><tr><td>T2</td></tr></table>"
)
doc.convertTablesToText()

val codes = doc.select("pre > code")
assertThat(codes).hasSize(2)
assertThat(codes[0].text()).isEqualTo("T1")
assertThat(codes[1].text()).isEqualTo("T2")
}

@Test
fun `single column table - no pipes`() {
val doc = Jsoup.parse(
"<table><tr><td>Row 1</td></tr><tr><td>Row 2</td></tr></table>"
)
doc.convertTablesToText()

val code = doc.selectFirst("pre > code")
assertThat(code).isNotNull()
assertThat(code!!.wholeText()).isEqualTo(
"Row 1\n" +
"Row 2"
)
}

@Test
fun `cell content with extra whitespace is trimmed`() {
val doc = Jsoup.parse(
"<table><tr><td> hello </td><td> world </td></tr></table>"
)
doc.convertTablesToText()

val code = doc.selectFirst("pre > code")
assertThat(code).isNotNull()
assertThat(code!!.wholeText()).isEqualTo("hello | world")
}

@Test
fun `integration via toHtmlDocument - table is replaced`() {
val body = FormattedBody(
format = MessageFormat.HTML,
body = "<p>Info:</p><table><thead><tr><th>Key</th><th>Value</th></tr></thead>" +
"<tbody><tr><td>A</td><td>1</td></tr></tbody></table>"
)

val document = body.toHtmlDocument(permalinkParser = FakePermalinkParser())
assertThat(document).isNotNull()
// Table should have been replaced
assertThat(document!!.selectFirst("table")).isNull()
val code = document.selectFirst("pre > code")
assertThat(code).isNotNull()
assertThat(code!!.wholeText()).isEqualTo(
"Key | Value\n" +
"----+------\n" +
"A | 1 "
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,22 @@ class ToPlainTextTest {
)
}

@Test
fun `FormattedBody toPlainText - converts table to plain text`() {
val formattedBody = FormattedBody(
format = MessageFormat.HTML,
body = "<table><thead><tr><th>Name</th><th>Age</th></tr></thead>" +
"<tbody><tr><td>Alice</td><td>30</td></tr></tbody></table>"
)
val result = formattedBody.toPlainText(permalinkParser = FakePermalinkParser())
// PlainTextNodeVisitor normalizes whitespace from TextNode.text(), so
// newlines and extra spaces in the pipe table are collapsed.
assertThat(result).contains("Name")
assertThat(result).contains("Age")
assertThat(result).contains("Alice")
assertThat(result).contains("30")
}

@Test
fun `TextMessageType toPlainText - returns the markdown body if the formatted one cannot be parsed`() {
val messageType = TextMessageType(
Expand Down