@@ -84,6 +84,24 @@ const voidElements: Record<string, boolean> = {
8484 'wbr' : true
8585} ;
8686
87+ /**
88+ * Quick lookup table for table elements.
89+ *
90+ * Per HTML spec:
91+ * - <table> can only contain: caption, colgroup, thead, tbody, tfoot, tr
92+ * - <thead>, <tbody>, <tfoot> can only contain: tr
93+ * - <tr> can only contain: td, th
94+ * - <colgroup> can only contain: col
95+ */
96+ const tableElements : Record < string , boolean > = {
97+ 'table' : true ,
98+ 'thead' : true ,
99+ 'tbody' : true ,
100+ 'tfoot' : true ,
101+ 'tr' : true ,
102+ 'colgroup' : true
103+ } ;
104+
87105/**
88106 * Checks if a node is nested inside a <pre> element.
89107 * In <pre> elements, whitespace must be preserved per HTML spec.
@@ -107,6 +125,19 @@ function isInsidePreElement(node: HtmlNode | undefined): boolean {
107125 return false ;
108126}
109127
128+ /**
129+ * Checks if a node is a table elemen
130+ *
131+ * @param node The node to check (typically parent of a text node)
132+ * @returns true if node is a table element
133+ */
134+ function isTableElement ( node : HtmlNode | undefined ) : boolean {
135+ if ( ! node || ! node . name ) {
136+ return false ;
137+ }
138+ return tableElements [ node . name ] === true ;
139+ }
140+
110141/**
111142 * Parses a single HTML tag.
112143 *
@@ -321,14 +352,22 @@ export function parseHtml(html: string): Array<HtmlNode> {
321352 nextChar &&
322353 nextChar !== '<'
323354 ) {
324- // This is a text node; add it as a child node
325- if ( current . children === undefined ) {
326- current . children = [ ] ;
355+ const textContent = html . slice ( start , html . indexOf ( '<' , start ) ) ;
356+ const isWhitespace = whitespaceRE . test ( textContent ) ;
357+ // Check if we are in a table element context with whitespace-only text node
358+ const whitespaceInTable = isWhitespace && isTableElement ( current ) ;
359+
360+ // Don't add whitespace-only text nodes if they are inside table elements
361+ if ( ! whitespaceInTable ) {
362+ // This is a text node; add it as a child node
363+ if ( current . children === undefined ) {
364+ current . children = [ ] ;
365+ }
366+ current . children . push ( {
367+ type : 'text' ,
368+ content : decode ( textContent ) ,
369+ } ) ;
327370 }
328- current . children . push ( {
329- type : 'text' ,
330- content : decode ( html . slice ( start , html . indexOf ( '<' , start ) ) ) ,
331- } ) ;
332371 }
333372
334373 // if we're at root, push new base node
@@ -383,11 +422,14 @@ export function parseHtml(html: string): Array<HtmlNode> {
383422 content = ' ' ;
384423 }
385424
386- // Don't add whitespace-only text nodes if they would be trailing text nodes
387- // or if they would be leading whitespace-only text nodes:
425+ // Check if we are in a table element context with whitespace-only text node
426+ const whitespaceInTable = whitespaceRE . test ( content ) && isTableElement ( current ) ;
427+
428+ // Don't add whitespace-only text nodes if they would be: trailing text nodes
429+ // leading whitespace-only text nodes, or inside table elements:
388430 // * end > -1 indicates this is not a trailing text node
389431 // * leading node is when level is -1 and parent has length 0
390- if ( ( end > - 1 && level + parent . length >= 0 ) || content !== ' ' ) {
432+ if ( ! whitespaceInTable && ( ( end > - 1 && level + parent . length >= 0 ) || content !== ' ' ) ) {
391433 parent . push ( {
392434 type : 'text' ,
393435 parent : current ,
0 commit comments