@@ -78,6 +78,7 @@ function domToMarkdownChildren(childNodes, settings, depth = 0) {
7878 * @property {number } maxDepth - Maximum depth to traverse
7979 * @property {string } excludeSelectors - CSS selectors to exclude from processing
8080 * @property {boolean } includeIframes - Whether to include iframe content
81+ * @property {boolean } trimBlankLinks - Whether to trim blank links
8182 */
8283
8384/**
@@ -127,12 +128,15 @@ function domToMarkdown(node, settings, depth = 0) {
127128 return `${ children } \n` ;
128129 case 'br' :
129130 return `\n` ;
131+ case 'img' :
132+ return `\n } )\n` ;
130133 case 'ul' :
134+ case 'ol' :
131135 return `\n${ children } \n` ;
132136 case 'li' :
133- return `\n- ${ children . trim ( ) } \n` ;
137+ return `\n- ${ collapseAndTrim ( children ) } \n` ;
134138 case 'a' :
135- return getLinkText ( node ) ;
139+ return getLinkText ( node , children , settings ) ;
136140 case 'iframe' : {
137141 if ( ! settings . includeIframes ) {
138142 return children ;
@@ -155,9 +159,13 @@ function collapseAndTrim(str) {
155159 return collapseWhitespace ( str ) . trim ( ) ;
156160}
157161
158- function getLinkText ( node ) {
162+ function getLinkText ( node , children , settings ) {
159163 const href = node . getAttribute ( 'href' ) ;
160- return href ? `[${ collapseAndTrim ( node . textContent ) } ](${ href } )` : collapseWhitespace ( node . textContent ) ;
164+ const trimmedContent = collapseAndTrim ( children ) ;
165+ if ( settings . trimBlankLinks && trimmedContent . length === 0 ) {
166+ return '' ;
167+ }
168+ return href ? `[${ trimmedContent } ](${ href } )` : collapseWhitespace ( children ) ;
161169}
162170
163171export default class PageContext extends ContentFeature {
@@ -420,6 +428,7 @@ export default class PageContext extends ContentFeature {
420428 const maxDepth = this . getFeatureSetting ( 'maxDepth' ) || 5000 ;
421429 let excludeSelectors = this . getFeatureSetting ( 'excludeSelectors' ) || [ '.ad' , '.sidebar' , '.footer' , '.nav' , '.header' ] ;
422430 const excludedInertElements = this . getFeatureSetting ( 'excludedInertElements' ) || [
431+ 'img' ,
423432 'script' ,
424433 'style' ,
425434 'link' ,
@@ -448,6 +457,7 @@ export default class PageContext extends ContentFeature {
448457 maxDepth,
449458 includeIframes : this . getFeatureSettingEnabled ( 'includeIframes' , 'enabled' ) ,
450459 excludeSelectors : excludeSelectorsString ,
460+ trimBlankLinks : this . getFeatureSettingEnabled ( 'trimBlankLinks' , 'enabled' ) ,
451461 } ) ;
452462 this . log . info ( 'Content markdown' , content , contentRoot ) ;
453463 }
0 commit comments