Skip to content

Commit 6588f4f

Browse files
gnapseclauderfgamaral
authored
fix(paste-html-table-as-string): preserve surrounding content when pasting HTML tables (#1189)
* fix(paste): preserve surrounding content when pasting HTML tables Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: unwrap paragraphs inside table cells when pasting HTML tables --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Ricardo Amaral <ricardo@doist.com>
1 parent 7f6d3e9 commit 6588f4f

File tree

2 files changed

+131
-45
lines changed

2 files changed

+131
-45
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import { transformPastedHTML } from './paste-html-table-as-string'
2+
3+
describe('Extension: PasteHTMLTableAsString', () => {
4+
describe('#transformPastedHTML', () => {
5+
test('returns original HTML when no tables present', () => {
6+
const html = '<p>Hello world</p>'
7+
expect(transformPastedHTML(html)).toBe(html)
8+
})
9+
10+
test('converts simple table to paragraphs', () => {
11+
const html = '<table><tr><td>A</td><td>B</td></tr><tr><td>C</td><td>D</td></tr></table>'
12+
const result = transformPastedHTML(html)
13+
14+
expect(result).toBe('<p>A B</p><p>C D</p>')
15+
})
16+
17+
test('preserves content before table', () => {
18+
const html = '<p>Before</p><table><tr><td>A</td><td>B</td></tr></table>'
19+
const result = transformPastedHTML(html)
20+
21+
expect(result).toBe('<p>Before</p><p>A B</p>')
22+
})
23+
24+
test('preserves content after table', () => {
25+
const html = '<table><tr><td>A</td><td>B</td></tr></table><p>After</p>'
26+
const result = transformPastedHTML(html)
27+
28+
expect(result).toBe('<p>A B</p><p>After</p>')
29+
})
30+
31+
test('preserves content before and after table', () => {
32+
const html = '<p>Before</p><table><tr><td>A</td><td>B</td></tr></table><p>After</p>'
33+
const result = transformPastedHTML(html)
34+
35+
expect(result).toBe('<p>Before</p><p>A B</p><p>After</p>')
36+
})
37+
38+
test('handles multiple tables with text between them', () => {
39+
const html =
40+
'<p>Start</p><table><tr><td>T1</td></tr></table><p>Middle</p><table><tr><td>T2</td></tr></table><p>End</p>'
41+
const result = transformPastedHTML(html)
42+
43+
expect(result).toBe('<p>Start</p><p>T1</p><p>Middle</p><p>T2</p><p>End</p>')
44+
})
45+
46+
test('filters out empty rows', () => {
47+
const html = '<table><tr><td>A</td></tr><tr><td> </td></tr><tr><td>B</td></tr></table>'
48+
const result = transformPastedHTML(html)
49+
50+
expect(result).toBe('<p>A</p><p>B</p>')
51+
})
52+
53+
test('preserves HTML formatting within cells', () => {
54+
const html =
55+
'<table><tr><td><strong>Bold</strong></td><td><em>Italic</em></td></tr></table>'
56+
const result = transformPastedHTML(html)
57+
58+
expect(result).toBe('<p><strong>Bold</strong> <em>Italic</em></p>')
59+
})
60+
61+
test('handles table with tbody', () => {
62+
const html = '<table><tbody><tr><td>A</td><td>B</td></tr></tbody></table>'
63+
const result = transformPastedHTML(html)
64+
65+
expect(result).toBe('<p>A B</p>')
66+
})
67+
68+
test('handles table with thead and tbody', () => {
69+
const html =
70+
'<table><thead><tr><th>H1</th><th>H2</th></tr></thead><tbody><tr><td>A</td><td>B</td></tr></tbody></table>'
71+
const result = transformPastedHTML(html)
72+
73+
expect(result).toBe('<p>H1 H2</p><p>A B</p>')
74+
})
75+
76+
test('unwraps paragraphs inside cells while preserving inline formatting', () => {
77+
const html =
78+
'<table><tr><td><p><span>A</span></p></td><td><p><strong>B</strong></p></td></tr></table>'
79+
const result = transformPastedHTML(html)
80+
81+
expect(result).toBe('<p><span>A</span> <strong>B</strong></p>')
82+
})
83+
})
84+
})

src/extensions/shared/paste-html-table-as-string.ts

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,51 @@ import { Plugin, PluginKey } from '@tiptap/pm/state'
44
import { PASTE_HTML_TABLE_AS_STRING_EXTENSION_PRIORITY } from '../../constants/extension-priorities'
55
import { parseHtmlToElement } from '../../helpers/dom'
66

7+
/**
8+
* Transforms pasted HTML by converting tables to paragraphs while preserving surrounding content.
9+
*/
10+
function transformPastedHTML(html: string): string {
11+
const body = parseHtmlToElement(html)
12+
const tables = body.querySelectorAll('table')
13+
14+
if (tables.length === 0) {
15+
return html
16+
}
17+
18+
for (const table of Array.from(tables)) {
19+
if (!table.rows) {
20+
continue
21+
}
22+
23+
// Convert table rows to paragraphs (using innerHTML to preserve formatting)
24+
const paragraphs = Array.from(table.rows)
25+
.map((row) =>
26+
Array.from(row.cells)
27+
.map((cell) => {
28+
// Unwrap paragraphs but preserve inline formatting
29+
const paragraphs = cell.querySelectorAll('p')
30+
31+
for (const p of Array.from(paragraphs)) {
32+
p.replaceWith(...Array.from(p.childNodes))
33+
}
34+
35+
return cell.innerHTML
36+
})
37+
.join(' '),
38+
)
39+
.filter((row) => row.trim().length > 0)
40+
.map((row) => {
41+
const p = document.createElement('p')
42+
p.innerHTML = row
43+
return p
44+
})
45+
46+
table.replaceWith(...paragraphs)
47+
}
48+
49+
return body.innerHTML
50+
}
51+
752
/**
853
* The `PasteHTMLTableAsString` extension adds the ability to paste a table copied from a spreadsheet
954
* web app (e.g., Google Sheets, Microsoft Excel), along with tables rendered by GitHub Flavored
@@ -24,54 +69,11 @@ const PasteHTMLTableAsString = Extension.create({
2469
new Plugin({
2570
key: new PluginKey('pasteHTMLTableAsString'),
2671
props: {
27-
transformPastedHTML(html) {
28-
// Attempt to extract table(s) HTML from the pasted HTML
29-
const tableHTML = html.match(/<table[^>]+>[\s\S]*?<\/table>/gi)
30-
31-
// Do not handle the event if no table HTML was found
32-
if (!tableHTML) {
33-
return html
34-
}
35-
36-
// Concatenate all tables into a single string of paragraphs
37-
return tableHTML.reduce((result, table) => {
38-
const { firstElementChild: tableElement } = parseHtmlToElement(table)
39-
40-
if (
41-
!tableElement ||
42-
!(tableElement instanceof HTMLTableElement) ||
43-
!tableElement.rows
44-
) {
45-
return result
46-
}
47-
48-
// Transform the table element into a string of paragraphs
49-
return (
50-
result +
51-
Array.from(tableElement.rows)
52-
// Join each cell into a single string for each row
53-
.reduce<string[]>((acc, row) => {
54-
return [
55-
...acc,
56-
// Use `innerHTML` instead of `innerText` to preserve
57-
// potential formatting (e.g., GFM) within each cell
58-
Array.from(row.cells)
59-
.map((cell) => cell.innerHTML)
60-
.join(' '),
61-
]
62-
}, [])
63-
// Discard rows that are completely empty
64-
.filter((row) => row.trim().length > 0)
65-
// Wrap each row in a paragraph
66-
.map((row) => `<p>${row}</p>`)
67-
.join('')
68-
)
69-
}, '')
70-
},
72+
transformPastedHTML,
7173
},
7274
}),
7375
]
7476
},
7577
})
7678

77-
export { PasteHTMLTableAsString }
79+
export { PasteHTMLTableAsString, transformPastedHTML }

0 commit comments

Comments
 (0)