Skip to content

Commit 8562d64

Browse files
Trim links and include images
1 parent 376142c commit 8562d64

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

injected/src/features/page-context.js

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ function domToMarkdownChildren(childNodes, settings, depth = 0) {
7878
* @property {number} maxDepth - Maximum depth to traverse
7979
* @property {string} excludeSelectors - CSS selectors to exclude from processing
8080
* @property {boolean} includeIframes - Whether to include iframe content
81+
* @property {boolean} trimBlankLinks - Whether to trim blank links
8182
*/
8283

8384
/**
@@ -127,12 +128,15 @@ function domToMarkdown(node, settings, depth = 0) {
127128
return `${children}\n`;
128129
case 'br':
129130
return `\n`;
131+
case 'img':
132+
return `\n![${collapseAndTrim(children)}](${node.getAttribute('src')})\n`;
130133
case 'ul':
134+
case 'ol':
131135
return `\n${children}\n`;
132136
case 'li':
133-
return `\n- ${children.trim()}\n`;
137+
return `\n- ${collapseAndTrim(children)}\n`;
134138
case 'a':
135-
return getLinkText(node);
139+
return getLinkText(node, children, settings);
136140
case 'iframe': {
137141
if (!settings.includeIframes) {
138142
return children;
@@ -155,9 +159,13 @@ function collapseAndTrim(str) {
155159
return collapseWhitespace(str).trim();
156160
}
157161

158-
function getLinkText(node) {
162+
function getLinkText(node, children, settings) {
159163
const href = node.getAttribute('href');
160-
return href ? `[${collapseAndTrim(node.textContent)}](${href})` : collapseWhitespace(node.textContent);
164+
const trimmedContent = collapseAndTrim(children);
165+
if (settings.trimBlankLinks && trimmedContent.length === 0) {
166+
return '';
167+
}
168+
return href ? `[${trimmedContent}](${href})` : collapseWhitespace(children);
161169
}
162170

163171
export default class PageContext extends ContentFeature {
@@ -420,6 +428,7 @@ export default class PageContext extends ContentFeature {
420428
const maxDepth = this.getFeatureSetting('maxDepth') || 5000;
421429
let excludeSelectors = this.getFeatureSetting('excludeSelectors') || ['.ad', '.sidebar', '.footer', '.nav', '.header'];
422430
const excludedInertElements = this.getFeatureSetting('excludedInertElements') || [
431+
'img',
423432
'script',
424433
'style',
425434
'link',
@@ -448,6 +457,7 @@ export default class PageContext extends ContentFeature {
448457
maxDepth,
449458
includeIframes: this.getFeatureSettingEnabled('includeIframes', 'enabled'),
450459
excludeSelectors: excludeSelectorsString,
460+
trimBlankLinks: this.getFeatureSettingEnabled('trimBlankLinks', 'enabled'),
451461
});
452462
this.log.info('Content markdown', content, contentRoot);
453463
}

0 commit comments

Comments
 (0)