66
77 md "github.com/firecrawl/html-to-markdown"
88 "github.com/PuerkitoBio/goquery"
9+ "golang.org/x/net/html"
910)
1011
1112// TableCompat is a compatibility plugin for environments where
@@ -145,17 +146,37 @@ func isHeadingRow(s *goquery.Selection) bool {
145146
146147 isTableOrBody := parent .Is ("table" ) || isFirstTbody (parent )
147148
149+ // Check if every cell is a TH - break early if we find a non-TH
148150 everyTH := true
149- s .Children ().Each (func (i int , s * goquery.Selection ) {
150- if goquery .NodeName (s ) != "th" {
151+ children := s .Children ()
152+ for i := 0 ; i < children .Length (); i ++ {
153+ if goquery .NodeName (children .Eq (i )) != "th" {
151154 everyTH = false
155+ break
152156 }
153- })
157+ }
154158
155- if parent .Children ().First ().IsSelection (s ) && isTableOrBody && everyTH {
156- return true
159+ // Optimize: Check if this is the first child by comparing node pointers directly
160+ // instead of creating a new Selection with parent.Children().First()
161+ if ! everyTH || ! isTableOrBody {
162+ return false
157163 }
158164
165+ // Check if s is the first element child by comparing nodes directly
166+ if len (s .Nodes ) == 0 || len (parent .Nodes ) == 0 {
167+ return false
168+ }
169+
170+ parentNode := parent .Nodes [0 ]
171+ sNode := s .Nodes [0 ]
172+
173+ // Find the first element child (skip text nodes)
174+ for child := parentNode .FirstChild ; child != nil ; child = child .NextSibling {
175+ if child .Type == html .ElementNode {
176+ return child == sNode
177+ }
178+ }
179+
159180 return false
160181}
161182func isFirstTbody (s * goquery.Selection ) bool {
@@ -175,15 +196,26 @@ func getCellContent(content string, s *goquery.Selection) string {
175196 // nested tables not found
176197 content = newLineRe .ReplaceAllString (content , "<br>" )
177198 }
178- index := - 1
179- for i , node := range s .Parent ().Children ().Nodes {
180- if s .IsNodes (node ) {
181- index = i
182- break
199+
200+ // Optimize: Check if this is the first element child by comparing node pointers directly
201+ // instead of linear search through all children
202+ parent := s .Parent ()
203+ isFirst := false
204+ if len (s .Nodes ) > 0 && len (parent .Nodes ) > 0 {
205+ parentNode := parent .Nodes [0 ]
206+ sNode := s .Nodes [0 ]
207+
208+ // Find the first element child (skip text nodes)
209+ for child := parentNode .FirstChild ; child != nil ; child = child .NextSibling {
210+ if child .Type == html .ElementNode {
211+ isFirst = (child == sNode )
212+ break
213+ }
183214 }
184215 }
216+
185217 prefix := " "
186- if index == 0 {
218+ if isFirst {
187219 prefix = "| "
188220 }
189221 return prefix + content + " |"
0 commit comments