From f0e144f314a7b0b8163821961fdd8caaa13fd133 Mon Sep 17 00:00:00 2001 From: Jonathan Kingston Date: Wed, 17 Sep 2025 13:15:26 +0100 Subject: [PATCH 1/5] Delayed content collect --- injected/src/features/page-context.js | 84 +++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/injected/src/features/page-context.js b/injected/src/features/page-context.js index a6f13e77af..28d41590ea 100644 --- a/injected/src/features/page-context.js +++ b/injected/src/features/page-context.js @@ -74,6 +74,8 @@ export default class PageContext extends ContentFeature { mutationObserver = null; lastSentContent = null; listenForUrlChanges = true; + /** @type {ReturnType | null} */ + #delayedRecheckTimer = null; init() { if (!this.shouldActivate()) { @@ -95,7 +97,11 @@ export default class PageContext extends ContentFeature { }); if (this.getFeatureSettingEnabled('subscribeToHashChange', 'enabled')) { window.addEventListener('hashchange', () => { + // Immediate collection this.handleContentCollectionRequest(); + + // Schedule delayed recheck after DOM settles + this.scheduleDelayedRecheck(); }); } if (this.getFeatureSettingEnabled('subscribeToPageShow', 'enabled')) { @@ -145,7 +151,11 @@ export default class PageContext extends ContentFeature { if (!this.shouldActivate()) { return; } + // Immediate collection this.handleContentCollectionRequest(); + + // Schedule delayed recheck after DOM settles + this.scheduleDelayedRecheck(); } setup() { @@ -169,6 +179,17 @@ export default class PageContext extends ContentFeature { this.#cachedContent = undefined; this.#cachedTimestamp = 0; this.stopObserving(); + this.clearTimers(); + } + + /** + * Clear all pending timers + */ + clearTimers() { + if (this.#delayedRecheckTimer) { + clearTimeout(this.#delayedRecheckTimer); + this.#delayedRecheckTimer = null; + } } set cachedContent(content) { @@ -198,6 +219,69 @@ export default class PageContext extends ContentFeature { } } + /** + * Schedule a delayed recheck after navigation events + */ + scheduleDelayedRecheck() { + // Clear any existing delayed recheck + if (this.#delayedRecheckTimer) { + clearTimeout(this.#delayedRecheckTimer); + } + + const delayMs = this.getFeatureSetting('navigationRecheckDelayMs') || 1500; + + this.log.info('Scheduling delayed recheck', { delayMs }); + this.#delayedRecheckTimer = setTimeout(() => { + this.log.info('Performing delayed recheck after navigation'); + + // Store the previous content for comparison + const previousContent = this.cachedContent; + + // Force fresh collection by invalidating cache + this.invalidateCache(); + + // Collect fresh content + const freshContent = this.collectPageContent(); + + // Only send if content has meaningfully changed + if (this.hasContentChanged(previousContent, freshContent)) { + this.log.info('Content changed after navigation delay - sending update'); + this.sendContentResponse(freshContent); + } else { + this.log.info('No significant content change after navigation delay'); + } + + this.#delayedRecheckTimer = null; + }, delayMs); + } + + /** + * Check if content has meaningfully changed + * @param {any} oldContent + * @param {any} newContent + * @returns {boolean} + */ + hasContentChanged(oldContent, newContent) { + if (!oldContent || !newContent) { + return true; + } + + // Compare key content fields + const fieldsToCompare = ['title', 'content', 'headings']; + + for (const field of fieldsToCompare) { + const oldValue = JSON.stringify(oldContent[field] || ''); + const newValue = JSON.stringify(newContent[field] || ''); + + if (oldValue !== newValue) { + this.log.info('Content changed in field', field); + return true; + } + } + + return false; + } + startObserving() { this.log.info('Starting observing', this.mutationObserver, this.#cachedContent); if (this.mutationObserver && this.#cachedContent && !this.isObserving) { From f946c43b42abf7bf47cadf78c2f7d74261af9907 Mon Sep 17 00:00:00 2001 From: Jonathan Kingston Date: Mon, 6 Oct 2025 23:40:17 +0100 Subject: [PATCH 2/5] Check for hidden --- injected/src/features/page-context.js | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/injected/src/features/page-context.js b/injected/src/features/page-context.js index 28d41590ea..6f09e2b2bd 100644 --- a/injected/src/features/page-context.js +++ b/injected/src/features/page-context.js @@ -7,6 +7,47 @@ function collapseWhitespace(str) { return typeof str === 'string' ? str.replace(/\s+/g, ' ') : ''; } +function checkNodeIsVisible(node) { + // Fast path: check if node is connected to document + // if (!node.isConnected) { + // return false; + // } + + try { + const style = window.getComputedStyle(node); + + // Check primary visibility properties + if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity) === 0) { + return false; + } + /* + // Check if element has zero dimensions + const rect = node.getBoundingClientRect(); + if (rect.width === 0 && rect.height === 0) { + return false; + } + + // Check for common hiding techniques + if (style.position === 'absolute' || style.position === 'fixed') { + const left = parseFloat(style.left); + const top = parseFloat(style.top); + // Elements positioned far off-screen + if (left < -9000 || top < -9000) { + return false; + } + } + + // Check for clipping + if (style.clip && style.clip !== 'auto' && style.clip.includes('rect(0')) { + return false; + } + */ + return true; + } catch (e) { + return false; + } +} + function domToMarkdown(node, maxLength = Infinity) { if (node.nodeType === Node.TEXT_NODE) { return collapseWhitespace(node.textContent); @@ -16,6 +57,9 @@ function domToMarkdown(node, maxLength = Infinity) { } const tag = node.tagName.toLowerCase(); + if (!checkNodeIsVisible(node)) { + return ''; + } // Build children string incrementally to exit early when maxLength is exceeded let children = ''; @@ -384,6 +428,7 @@ export default class PageContext extends ContentFeature { this.log.info('Calling domToMarkdown', clone.innerHTML); content += domToMarkdown(clone, upperLimit); + this.log.info('Content markdown', content, clone, contentRoot); } content = content.trim(); From 4fa0341e0253bcc44e459803b6e98f8727ed53a3 Mon Sep 17 00:00:00 2001 From: Jonathan Kingston Date: Wed, 8 Oct 2025 15:08:10 +0100 Subject: [PATCH 3/5] Fix up clearing timers for delayed content collection --- injected/src/features/page-context.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/injected/src/features/page-context.js b/injected/src/features/page-context.js index 6f09e2b2bd..cbe207bb00 100644 --- a/injected/src/features/page-context.js +++ b/injected/src/features/page-context.js @@ -138,6 +138,7 @@ export default class PageContext extends ContentFeature { } window.addEventListener('load', () => { this.handleContentCollectionRequest(); + this.scheduleDelayedRecheck(); }); if (this.getFeatureSettingEnabled('subscribeToHashChange', 'enabled')) { window.addEventListener('hashchange', () => { @@ -223,7 +224,6 @@ export default class PageContext extends ContentFeature { this.#cachedContent = undefined; this.#cachedTimestamp = 0; this.stopObserving(); - this.clearTimers(); } /** @@ -281,8 +281,9 @@ export default class PageContext extends ContentFeature { // Store the previous content for comparison const previousContent = this.cachedContent; - // Force fresh collection by invalidating cache + // Invalidate existing cache this.invalidateCache(); + this.clearTimers(); // Collect fresh content const freshContent = this.collectPageContent(); From bfe80f80129a6e148b2f1fbd1f8c26c43fa13876 Mon Sep 17 00:00:00 2001 From: Jonathan Kingston Date: Wed, 8 Oct 2025 15:11:24 +0100 Subject: [PATCH 4/5] Clean up comments --- injected/src/features/page-context.js | 28 ++------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/injected/src/features/page-context.js b/injected/src/features/page-context.js index cbe207bb00..f744573afd 100644 --- a/injected/src/features/page-context.js +++ b/injected/src/features/page-context.js @@ -8,10 +8,8 @@ function collapseWhitespace(str) { } function checkNodeIsVisible(node) { - // Fast path: check if node is connected to document - // if (!node.isConnected) { - // return false; - // } + // Note that we're not checking if the node is connected to the document + // we are cloning the node so it's never connected. try { const style = window.getComputedStyle(node); @@ -20,28 +18,6 @@ function checkNodeIsVisible(node) { if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity) === 0) { return false; } - /* - // Check if element has zero dimensions - const rect = node.getBoundingClientRect(); - if (rect.width === 0 && rect.height === 0) { - return false; - } - - // Check for common hiding techniques - if (style.position === 'absolute' || style.position === 'fixed') { - const left = parseFloat(style.left); - const top = parseFloat(style.top); - // Elements positioned far off-screen - if (left < -9000 || top < -9000) { - return false; - } - } - - // Check for clipping - if (style.clip && style.clip !== 'auto' && style.clip.includes('rect(0')) { - return false; - } - */ return true; } catch (e) { return false; From 1da3fe7da82ec06efe2d4e2e6b3cca370c06e58d Mon Sep 17 00:00:00 2001 From: Jonathan Kingston Date: Wed, 8 Oct 2025 15:16:00 +0100 Subject: [PATCH 5/5] Remove meaningful content check --- injected/src/features/page-context.js | 37 +-------------------------- 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/injected/src/features/page-context.js b/injected/src/features/page-context.js index f744573afd..d9def12a25 100644 --- a/injected/src/features/page-context.js +++ b/injected/src/features/page-context.js @@ -254,9 +254,6 @@ export default class PageContext extends ContentFeature { this.#delayedRecheckTimer = setTimeout(() => { this.log.info('Performing delayed recheck after navigation'); - // Store the previous content for comparison - const previousContent = this.cachedContent; - // Invalidate existing cache this.invalidateCache(); this.clearTimers(); @@ -265,44 +262,12 @@ export default class PageContext extends ContentFeature { const freshContent = this.collectPageContent(); // Only send if content has meaningfully changed - if (this.hasContentChanged(previousContent, freshContent)) { - this.log.info('Content changed after navigation delay - sending update'); - this.sendContentResponse(freshContent); - } else { - this.log.info('No significant content change after navigation delay'); - } + this.sendContentResponse(freshContent); this.#delayedRecheckTimer = null; }, delayMs); } - /** - * Check if content has meaningfully changed - * @param {any} oldContent - * @param {any} newContent - * @returns {boolean} - */ - hasContentChanged(oldContent, newContent) { - if (!oldContent || !newContent) { - return true; - } - - // Compare key content fields - const fieldsToCompare = ['title', 'content', 'headings']; - - for (const field of fieldsToCompare) { - const oldValue = JSON.stringify(oldContent[field] || ''); - const newValue = JSON.stringify(newContent[field] || ''); - - if (oldValue !== newValue) { - this.log.info('Content changed in field', field); - return true; - } - } - - return false; - } - startObserving() { this.log.info('Starting observing', this.mutationObserver, this.#cachedContent); if (this.mutationObserver && this.#cachedContent && !this.isObserving) {