Skip to content

Commit 75f5d90

Browse files
graycreateclaude
andauthored
feat: add support for additional HTML tags in RichView (#78)
* feat: add support for additional HTML tags in RichView Add support for previously unsupported HTML tags: - Table support (table, thead, tbody, tfoot, tr, th, td) - Strikethrough (del, s, strike) - Underline (u, ins) - Superscript/subscript (sup, sub) - Mark/highlight (mark) - Definition lists (dl, dt, dd) - Semantic elements (abbr, cite, kbd, samp, var, small) - Figure elements (figure, figcaption) - Document structure (address, time, details, summary) - Container elements (article, section, nav, aside, header, footer, main, caption) Also added corresponding tests for all new HTML tags and strikethrough/highlight rendering in MarkdownRenderer. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * fix: address Copilot review comments for HTML tag support - Preserve <u> HTML tags for underline instead of converting to _text_ to avoid conflict with italic styling - Preserve <sup>/<sub> HTML tags for superscript/subscript instead of ^/~ markers to avoid conflicts with regular text - Use regex pattern for table separator detection to avoid false positives when cell content contains "---" - Escape pipe characters in table cells to prevent markdown table structure breakage - Remove unused headerRowCount variable and related code - Update tests to reflect new behavior - Add test for pipe escaping in table cells 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> --------- Co-authored-by: Claude <[email protected]>
1 parent abf298d commit 75f5d90

File tree

3 files changed

+558
-1
lines changed

3 files changed

+558
-1
lines changed

V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift

Lines changed: 190 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,117 @@ public class HTMLToMarkdownConverter {
173173
case "hr":
174174
result += "\n---\n"
175175

176+
// Table support
177+
case "table":
178+
result += try convertTable(childElement)
179+
180+
case "thead", "tbody", "tfoot":
181+
// These are handled by table, but if encountered alone, process children
182+
result += try convertElement(childElement)
183+
184+
case "tr", "th", "td":
185+
// These should be handled by table, but if encountered alone, process children
186+
result += try convertElement(childElement)
187+
188+
// Strikethrough
189+
case "del", "s", "strike":
190+
let content = try convertElement(childElement)
191+
result += "~~\(content)~~"
192+
193+
// Underline - no standard markdown, preserve as HTML for custom renderer
194+
case "u", "ins":
195+
let content = try convertElement(childElement)
196+
result += "<u>\(content)</u>"
197+
198+
// Superscript/subscript - preserve as HTML for custom renderer
199+
case "sup":
200+
let content = try convertElement(childElement)
201+
result += "<sup>\(content)</sup>"
202+
203+
case "sub":
204+
let content = try convertElement(childElement)
205+
result += "<sub>\(content)</sub>"
206+
207+
// Mark/highlight - render with markers
208+
case "mark":
209+
let content = try convertElement(childElement)
210+
result += "==\(content)=="
211+
212+
// Definition list
213+
case "dl":
214+
result += try convertDefinitionList(childElement)
215+
216+
case "dt":
217+
let content = try convertElement(childElement)
218+
result += "\n**\(content)**\n"
219+
220+
case "dd":
221+
let content = try convertElement(childElement)
222+
result += ": \(content)\n"
223+
224+
// Abbreviation - just show the text with title
225+
case "abbr":
226+
let content = try convertElement(childElement)
227+
if let title = try? childElement.attr("title"), !title.isEmpty {
228+
result += "\(content) (\(title))"
229+
} else {
230+
result += content
231+
}
232+
233+
// Citation
234+
case "cite":
235+
let content = try convertElement(childElement)
236+
result += "*\(content)*"
237+
238+
// Keyboard input
239+
case "kbd":
240+
let content = try convertElement(childElement)
241+
result += "`\(content)`"
242+
243+
// Sample output
244+
case "samp":
245+
let content = try convertElement(childElement)
246+
result += "`\(content)`"
247+
248+
// Variable
249+
case "var":
250+
let content = try convertElement(childElement)
251+
result += "*\(content)*"
252+
253+
// Small text
254+
case "small":
255+
let content = try convertElement(childElement)
256+
result += content
257+
258+
// Figure and figcaption
259+
case "figure":
260+
result += try convertElement(childElement)
261+
262+
case "figcaption":
263+
let content = try convertElement(childElement)
264+
result += "\n*\(content)*\n"
265+
266+
// Address
267+
case "address":
268+
let content = try convertElement(childElement)
269+
result += "\n*\(content)*\n"
270+
271+
// Time - just show the text
272+
case "time":
273+
let content = try convertElement(childElement)
274+
result += content
275+
276+
// Details/summary - collapsible sections
277+
case "details":
278+
result += try convertElement(childElement)
279+
280+
case "summary":
281+
let content = try convertElement(childElement)
282+
result += "\n**\(content)**\n"
283+
176284
// Container elements - just process children
177-
case "div", "span", "body", "html":
285+
case "div", "span", "body", "html", "article", "section", "nav", "aside",
286+
"header", "footer", "main", "caption":
178287
result += try convertElement(childElement)
179288

180289
default:
@@ -212,6 +321,86 @@ public class HTMLToMarkdownConverter {
212321
return result
213322
}
214323

324+
/// Convert table to Markdown
325+
private func convertTable(_ element: Element) throws -> String {
326+
var result = "\n"
327+
var rows: [[String]] = []
328+
329+
// Get all rows from thead and tbody
330+
let allRows = try element.select("tr")
331+
332+
for row in allRows {
333+
var cells: [String] = []
334+
335+
// Get th and td cells
336+
for cell in row.children() {
337+
let tagName = cell.tagName().lowercased()
338+
if tagName == "th" || tagName == "td" {
339+
let content = try convertElement(cell)
340+
.replacingOccurrences(of: "\n", with: " ")
341+
.replacingOccurrences(of: "|", with: "\\|") // Escape pipes for Markdown tables
342+
.trimmingCharacters(in: .whitespaces)
343+
cells.append(content)
344+
}
345+
}
346+
347+
if !cells.isEmpty {
348+
rows.append(cells)
349+
}
350+
}
351+
352+
guard !rows.isEmpty else { return "" }
353+
354+
// Calculate column widths
355+
let columnCount = rows.map { $0.count }.max() ?? 0
356+
guard columnCount > 0 else { return "" }
357+
358+
// Normalize rows to have the same column count
359+
let normalizedRows = rows.map { row -> [String] in
360+
var normalized = row
361+
while normalized.count < columnCount {
362+
normalized.append("")
363+
}
364+
return normalized
365+
}
366+
367+
// Build markdown table
368+
for (index, row) in normalizedRows.enumerated() {
369+
result += "| " + row.joined(separator: " | ") + " |\n"
370+
371+
// Add separator after header row
372+
if index == 0 {
373+
let separator = Array(repeating: "---", count: columnCount)
374+
result += "| " + separator.joined(separator: " | ") + " |\n"
375+
}
376+
}
377+
378+
result += "\n"
379+
return result
380+
}
381+
382+
/// Convert definition list to Markdown
383+
private func convertDefinitionList(_ element: Element) throws -> String {
384+
var result = "\n"
385+
386+
for child in element.children() {
387+
let tagName = child.tagName().lowercased()
388+
let content = try convertElement(child)
389+
390+
switch tagName {
391+
case "dt":
392+
result += "\n**\(content)**\n"
393+
case "dd":
394+
result += ": \(content)\n"
395+
default:
396+
result += content
397+
}
398+
}
399+
400+
result += "\n"
401+
return result
402+
}
403+
215404
/// Escape special Markdown characters
216405
private func escapeMarkdown(_ text: String) -> String {
217406
// Only escape characters that would cause markdown parsing issues

V2er/Sources/RichView/Renderers/MarkdownRenderer.swift

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ public class MarkdownRenderer {
8080
} else if line.starts(with: "---") {
8181
// Horizontal rule
8282
attributedString.append(AttributedString("—————————————\n"))
83+
} else if line.starts(with: "|") && line.hasSuffix("|") {
84+
// Markdown table
85+
let (tableBlock, linesConsumed) = extractTableBlock(lines, startIndex: index)
86+
attributedString.append(renderTable(tableBlock))
87+
index += linesConsumed
88+
continue
8389
} else {
8490
// Regular paragraph with inline formatting
8591
attributedString.append(renderInlineMarkdown(line))
@@ -296,6 +302,46 @@ public class MarkdownRenderer {
296302
continue
297303
}
298304

305+
// Check for strikethrough
306+
if let strikeMatch = currentText.firstMatch(of: /~~(.+?)~~/) {
307+
// Add text before strikethrough
308+
let beforeRange = currentText.startIndex..<strikeMatch.range.lowerBound
309+
if !beforeRange.isEmpty {
310+
result.append(renderPlainText(String(currentText[beforeRange])))
311+
}
312+
313+
// Add strikethrough text
314+
var strikeText = AttributedString(String(strikeMatch.1))
315+
strikeText.font = .system(size: stylesheet.body.fontSize)
316+
strikeText.foregroundColor = stylesheet.body.color.uiColor
317+
strikeText.strikethroughStyle = .single
318+
result.append(strikeText)
319+
320+
// Continue with remaining text
321+
currentText = String(currentText[strikeMatch.range.upperBound...])
322+
continue
323+
}
324+
325+
// Check for highlight/mark
326+
if let highlightMatch = currentText.firstMatch(of: /==(.+?)==/) {
327+
// Add text before highlight
328+
let beforeRange = currentText.startIndex..<highlightMatch.range.lowerBound
329+
if !beforeRange.isEmpty {
330+
result.append(renderPlainText(String(currentText[beforeRange])))
331+
}
332+
333+
// Add highlighted text
334+
var highlightText = AttributedString(String(highlightMatch.1))
335+
highlightText.font = .system(size: stylesheet.body.fontSize)
336+
highlightText.foregroundColor = stylesheet.body.color.uiColor
337+
highlightText.backgroundColor = Color.yellow.opacity(0.3)
338+
result.append(highlightText)
339+
340+
// Continue with remaining text
341+
currentText = String(currentText[highlightMatch.range.upperBound...])
342+
continue
343+
}
344+
299345
// No more special elements, add remaining text
300346
result.append(renderPlainText(currentText))
301347
break
@@ -322,4 +368,94 @@ public class MarkdownRenderer {
322368
let content = String(match.2)
323369
return (number, content)
324370
}
371+
372+
// MARK: - Table Rendering
373+
374+
/// Extract table block from lines
375+
private func extractTableBlock(_ lines: [String], startIndex: Int) -> ([[String]], Int) {
376+
var rows: [[String]] = []
377+
var index = startIndex
378+
379+
while index < lines.count {
380+
let line = lines[index]
381+
382+
// Check if line is a table row
383+
guard line.starts(with: "|") && line.hasSuffix("|") else {
384+
break
385+
}
386+
387+
// Skip separator row (| --- | --- | or with colons for alignment)
388+
if line.range(of: #"^\|\s*(:?-+:?)\s*(\|\s*(:?-+:?)\s*)*\|$"#, options: .regularExpression) != nil {
389+
index += 1
390+
continue
391+
}
392+
393+
// Parse cells
394+
let cells = line
395+
.trimmingCharacters(in: CharacterSet(charactersIn: "|"))
396+
.components(separatedBy: "|")
397+
.map { $0.trimmingCharacters(in: .whitespaces) }
398+
399+
if !cells.isEmpty {
400+
rows.append(cells)
401+
}
402+
403+
index += 1
404+
}
405+
406+
return (rows, index - startIndex)
407+
}
408+
409+
/// Render markdown table
410+
private func renderTable(_ rows: [[String]]) -> AttributedString {
411+
guard !rows.isEmpty else { return AttributedString() }
412+
413+
var result = AttributedString("\n")
414+
415+
// Get column count
416+
let columnCount = rows.map { $0.count }.max() ?? 0
417+
guard columnCount > 0 else { return AttributedString() }
418+
419+
// Calculate column widths for alignment
420+
var columnWidths: [Int] = Array(repeating: 0, count: columnCount)
421+
for row in rows {
422+
for (i, cell) in row.enumerated() where i < columnCount {
423+
columnWidths[i] = max(columnWidths[i], cell.count)
424+
}
425+
}
426+
427+
for (rowIndex, row) in rows.enumerated() {
428+
// Render each cell
429+
for (cellIndex, cell) in row.enumerated() {
430+
// Add cell content
431+
var cellText = renderInlineMarkdown(cell)
432+
433+
// Apply header style for first row
434+
if rowIndex == 0 {
435+
cellText.font = .system(size: stylesheet.body.fontSize, weight: .semibold)
436+
}
437+
438+
result.append(cellText)
439+
440+
// Add separator between cells
441+
if cellIndex < row.count - 1 {
442+
var separator = AttributedString("")
443+
separator.foregroundColor = Color.gray.opacity(0.5)
444+
result.append(separator)
445+
}
446+
}
447+
448+
result.append(AttributedString("\n"))
449+
450+
// Add separator line after header
451+
if rowIndex == 0 && rows.count > 1 {
452+
var separatorLine = AttributedString(String(repeating: "", count: 40) + "\n")
453+
separatorLine.foregroundColor = Color.gray.opacity(0.3)
454+
result.append(separatorLine)
455+
}
456+
}
457+
458+
result.append(AttributedString("\n"))
459+
return result
460+
}
325461
}

0 commit comments

Comments
 (0)