Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 196 additions & 1 deletion V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,117 @@ public class HTMLToMarkdownConverter {
case "hr":
result += "\n---\n"

// Table support
case "table":
result += try convertTable(childElement)

case "thead", "tbody", "tfoot":
// These are handled by table, but if encountered alone, process children
result += try convertElement(childElement)

case "tr", "th", "td":
// These should be handled by table, but if encountered alone, process children
result += try convertElement(childElement)

// Strikethrough
case "del", "s", "strike":
let content = try convertElement(childElement)
result += "~~\(content)~~"

// Underline - no standard markdown, render as emphasized text
case "u", "ins":
let content = try convertElement(childElement)
result += "_\(content)_"
Copy link

Copilot AI Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The underline HTML tags (<u> and <ins>) are converted to markdown format _text_, but the MarkdownRenderer does not have logic to render this format with actual underline styling. The underscore format will be interpreted as italic (similar to *text*), not underline.

Consider either:

  1. Adding underline rendering support in renderInlineMarkdown() method to handle _text_ with underlineStyle = .single
  2. Using a different markdown format for underline that doesn't conflict with italic (e.g., <u>text</u> or custom markers)
Suggested change
// Underline - no standard markdown, render as emphasized text
case "u", "ins":
let content = try convertElement(childElement)
result += "_\(content)_"
// Underline - no standard markdown, preserve as HTML <u> for custom renderer
case "u", "ins":
let content = try convertElement(childElement)
result += "<u>\(content)</u>"

Copilot uses AI. Check for mistakes.

// Superscript/subscript - render with markers
case "sup":
let content = try convertElement(childElement)
result += "^\(content)"

case "sub":
let content = try convertElement(childElement)
result += "~\(content)"
Copy link

Copilot AI Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Superscript and subscript HTML tags are converted to markdown format (^text and ~text), but the MarkdownRenderer does not have logic to render these formats. These will be rendered as plain text without any special styling.

Additionally, the subscript marker ~ could conflict with regular tildes in text (e.g., "~5 minutes ago"). Consider:

  1. Adding rendering support in renderInlineMarkdown() for ^ and ~ markers
  2. Using a more distinctive marker that's less likely to appear in regular text (e.g., ^{text} or HTML-style <sup>text</sup>)
Suggested change
// Superscript/subscript - render with markers
case "sup":
let content = try convertElement(childElement)
result += "^\(content)"
case "sub":
let content = try convertElement(childElement)
result += "~\(content)"
// Superscript/subscript - render as HTML tags to preserve formatting
case "sup":
let content = try convertElement(childElement)
result += "<sup>\(content)</sup>"
case "sub":
let content = try convertElement(childElement)
result += "<sub>\(content)</sub>"

Copilot uses AI. Check for mistakes.

// Mark/highlight - render with markers
case "mark":
let content = try convertElement(childElement)
result += "==\(content)=="

// Definition list
case "dl":
result += try convertDefinitionList(childElement)

case "dt":
let content = try convertElement(childElement)
result += "\n**\(content)**\n"

case "dd":
let content = try convertElement(childElement)
result += ": \(content)\n"

// Abbreviation - just show the text with title
case "abbr":
let content = try convertElement(childElement)
if let title = try? childElement.attr("title"), !title.isEmpty {
result += "\(content) (\(title))"
} else {
result += content
}

// Citation
case "cite":
let content = try convertElement(childElement)
result += "*\(content)*"

// Keyboard input
case "kbd":
let content = try convertElement(childElement)
result += "`\(content)`"

// Sample output
case "samp":
let content = try convertElement(childElement)
result += "`\(content)`"

// Variable
case "var":
let content = try convertElement(childElement)
result += "*\(content)*"

// Small text
case "small":
let content = try convertElement(childElement)
result += content

// Figure and figcaption
case "figure":
result += try convertElement(childElement)

case "figcaption":
let content = try convertElement(childElement)
result += "\n*\(content)*\n"

// Address
case "address":
let content = try convertElement(childElement)
result += "\n*\(content)*\n"

// Time - just show the text
case "time":
let content = try convertElement(childElement)
result += content

// Details/summary - collapsible sections
case "details":
result += try convertElement(childElement)

case "summary":
let content = try convertElement(childElement)
result += "\n**\(content)**\n"

// Container elements - just process children
case "div", "span", "body", "html":
case "div", "span", "body", "html", "article", "section", "nav", "aside",
"header", "footer", "main", "caption":
result += try convertElement(childElement)

default:
Expand Down Expand Up @@ -212,6 +321,92 @@ public class HTMLToMarkdownConverter {
return result
}

/// Convert table to Markdown
private func convertTable(_ element: Element) throws -> String {
var result = "\n"
var rows: [[String]] = []
var headerRowCount = 0

// Get all rows from thead and tbody
let allRows = try element.select("tr")

for row in allRows {
var cells: [String] = []
let thCells = try row.select("th")
let isHeaderRow = row.parent()?.tagName().lowercased() == "thead"
|| !thCells.isEmpty()

// Get th and td cells
for cell in row.children() {
let tagName = cell.tagName().lowercased()
if tagName == "th" || tagName == "td" {
let content = try convertElement(cell)
.replacingOccurrences(of: "\n", with: " ")
Copy link

Copilot AI Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Table cells containing pipe characters (|) are not escaped, which will break the markdown table structure. For example, a cell containing "option A | option B" would be split into multiple columns.

Consider escaping pipe characters in cell content before building the table:

let content = try convertElement(cell)
    .replacingOccurrences(of: "\n", with: " ")
    .replacingOccurrences(of: "|", with: "\\|")  // Escape pipes
    .trimmingCharacters(in: .whitespaces)
Suggested change
.replacingOccurrences(of: "\n", with: " ")
.replacingOccurrences(of: "\n", with: " ")
.replacingOccurrences(of: "|", with: "\\|") // Escape pipes for Markdown tables

Copilot uses AI. Check for mistakes.
.trimmingCharacters(in: .whitespaces)
cells.append(content)
}
}

if !cells.isEmpty {
rows.append(cells)
if isHeaderRow && headerRowCount == 0 {
headerRowCount = 1
}
Copy link

Copilot AI Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The headerRowCount variable is incremented when a header row is detected but is never used to influence the table conversion logic. The separator is unconditionally added after the first row (line 378) regardless of whether a header was detected.

Consider either:

  1. Removing this unused variable and the isHeaderRow logic if it's not needed
  2. Using it to control separator placement (though markdown tables always require a separator after the first row, so the current behavior may be intentional)

Copilot uses AI. Check for mistakes.
}
}

guard !rows.isEmpty else { return "" }

// Calculate column widths
let columnCount = rows.map { $0.count }.max() ?? 0
guard columnCount > 0 else { return "" }

// Normalize rows to have the same column count
let normalizedRows = rows.map { row -> [String] in
var normalized = row
while normalized.count < columnCount {
normalized.append("")
}
return normalized
}

// Build markdown table
for (index, row) in normalizedRows.enumerated() {
result += "| " + row.joined(separator: " | ") + " |\n"

// Add separator after header row
if index == 0 {
let separator = Array(repeating: "---", count: columnCount)
result += "| " + separator.joined(separator: " | ") + " |\n"
}
}

result += "\n"
return result
}

/// Convert definition list to Markdown
private func convertDefinitionList(_ element: Element) throws -> String {
var result = "\n"

for child in element.children() {
let tagName = child.tagName().lowercased()
let content = try convertElement(child)

switch tagName {
case "dt":
result += "\n**\(content)**\n"
case "dd":
result += ": \(content)\n"
default:
result += content
}
}

result += "\n"
return result
}

/// Escape special Markdown characters
private func escapeMarkdown(_ text: String) -> String {
// Only escape characters that would cause markdown parsing issues
Expand Down
136 changes: 136 additions & 0 deletions V2er/Sources/RichView/Renderers/MarkdownRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public class MarkdownRenderer {
} else if line.starts(with: "---") {
// Horizontal rule
attributedString.append(AttributedString("—————————————\n"))
} else if line.starts(with: "|") && line.hasSuffix("|") {
// Markdown table
let (tableBlock, linesConsumed) = extractTableBlock(lines, startIndex: index)
attributedString.append(renderTable(tableBlock))
index += linesConsumed
continue
} else {
// Regular paragraph with inline formatting
attributedString.append(renderInlineMarkdown(line))
Expand Down Expand Up @@ -296,6 +302,46 @@ public class MarkdownRenderer {
continue
}

// Check for strikethrough
if let strikeMatch = currentText.firstMatch(of: /~~(.+?)~~/) {
// Add text before strikethrough
let beforeRange = currentText.startIndex..<strikeMatch.range.lowerBound
if !beforeRange.isEmpty {
result.append(renderPlainText(String(currentText[beforeRange])))
}

// Add strikethrough text
var strikeText = AttributedString(String(strikeMatch.1))
strikeText.font = .system(size: stylesheet.body.fontSize)
strikeText.foregroundColor = stylesheet.body.color.uiColor
strikeText.strikethroughStyle = .single
result.append(strikeText)

// Continue with remaining text
currentText = String(currentText[strikeMatch.range.upperBound...])
continue
}

// Check for highlight/mark
if let highlightMatch = currentText.firstMatch(of: /==(.+?)==/) {
// Add text before highlight
let beforeRange = currentText.startIndex..<highlightMatch.range.lowerBound
if !beforeRange.isEmpty {
result.append(renderPlainText(String(currentText[beforeRange])))
}

// Add highlighted text
var highlightText = AttributedString(String(highlightMatch.1))
highlightText.font = .system(size: stylesheet.body.fontSize)
highlightText.foregroundColor = stylesheet.body.color.uiColor
highlightText.backgroundColor = Color.yellow.opacity(0.3)
result.append(highlightText)

// Continue with remaining text
currentText = String(currentText[highlightMatch.range.upperBound...])
continue
}

// No more special elements, add remaining text
result.append(renderPlainText(currentText))
break
Expand All @@ -322,4 +368,94 @@ public class MarkdownRenderer {
let content = String(match.2)
return (number, content)
}

// MARK: - Table Rendering

/// Extract table block from lines
private func extractTableBlock(_ lines: [String], startIndex: Int) -> ([[String]], Int) {
var rows: [[String]] = []
var index = startIndex

while index < lines.count {
let line = lines[index]

// Check if line is a table row
guard line.starts(with: "|") && line.hasSuffix("|") else {
break
}

// Skip separator row (| --- | --- |)
if line.contains("---") {
Copy link

Copilot AI Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The table separator detection using line.contains("---") could incorrectly skip rows that legitimately contain three consecutive hyphens in their cell content (e.g., a cell with "2020---2025" or "pre---post").

Consider a more precise check that verifies the line matches the separator pattern more strictly:

if line.range(of: #"^\|\s*---+\s*(\|\s*---+\s*)*\|$"#, options: .regularExpression) != nil {

This ensures the line consists only of pipes and hyphens in the expected separator format.

Suggested change
// Skip separator row (| --- | --- |)
if line.contains("---") {
// Skip separator row (| --- | --- | or with colons for alignment)
if line.range(of: #"^\|\s*(:?-+:?)\s*(\|\s*(:?-+:?)\s*)*\|$"#, options: .regularExpression) != nil {

Copilot uses AI. Check for mistakes.
index += 1
continue
}

// Parse cells
let cells = line
.trimmingCharacters(in: CharacterSet(charactersIn: "|"))
.components(separatedBy: "|")
.map { $0.trimmingCharacters(in: .whitespaces) }

if !cells.isEmpty {
rows.append(cells)
}

index += 1
}

return (rows, index - startIndex)
}

/// Render markdown table
private func renderTable(_ rows: [[String]]) -> AttributedString {
guard !rows.isEmpty else { return AttributedString() }

var result = AttributedString("\n")

// Get column count
let columnCount = rows.map { $0.count }.max() ?? 0
guard columnCount > 0 else { return AttributedString() }

// Calculate column widths for alignment
var columnWidths: [Int] = Array(repeating: 0, count: columnCount)
for row in rows {
for (i, cell) in row.enumerated() where i < columnCount {
columnWidths[i] = max(columnWidths[i], cell.count)
}
}

for (rowIndex, row) in rows.enumerated() {
// Render each cell
for (cellIndex, cell) in row.enumerated() {
// Add cell content
var cellText = renderInlineMarkdown(cell)

// Apply header style for first row
if rowIndex == 0 {
cellText.font = .system(size: stylesheet.body.fontSize, weight: .semibold)
}

result.append(cellText)

// Add separator between cells
if cellIndex < row.count - 1 {
var separator = AttributedString(" │ ")
separator.foregroundColor = Color.gray.opacity(0.5)
result.append(separator)
}
}

result.append(AttributedString("\n"))

// Add separator line after header
if rowIndex == 0 && rows.count > 1 {
var separatorLine = AttributedString(String(repeating: "─", count: 40) + "\n")
separatorLine.foregroundColor = Color.gray.opacity(0.3)
result.append(separatorLine)
}
}

result.append(AttributedString("\n"))
return result
}
}
Loading
Loading