Skip to content

Commit dc9a291

Browse files
authored
Merge pull request #124 from fmacpro/codex/amend-screenshot-logic-for-popups
Capture screenshots after consent dismissal
2 parents 810ab96 + 1a89225 commit dc9a291

File tree

4 files changed

+55
-12
lines changed

4 files changed

+55
-12
lines changed

index.js

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -545,14 +545,6 @@ const articleParser = async function (browser, options, socket) {
545545
try { article.meta.title.text = await page.title() } catch { article.meta.title.text = '' }
546546
}
547547

548-
// Take mobile screenshot
549-
if (options.enabled.includes('screenshot') && timeLeft() > 300) {
550-
log('analyze', 'Capturing screenshot')
551-
try {
552-
article.screenshot = await page.screenshot({ encoding: 'base64', type: 'jpeg', quality: 60 })
553-
} catch { /* ignore screenshot failures (e.g., page closed on timeout) */ }
554-
}
555-
556548
// If the page/browser was closed (e.g., due to global timeout), abort gracefully
557549
try { if (page.isClosed && page.isClosed()) throw new Error('Page closed') } catch {}
558550
if (timeLeft() <= 0) throw new Error('Timeout budget exceeded')
@@ -672,6 +664,14 @@ log('analyze', 'Evaluating meta tags')
672664
}
673665
} catch { /* ignore */ }
674666

667+
// Take mobile screenshot after consent handling
668+
if (options.enabled.includes('screenshot') && timeLeft() > 300) {
669+
log('analyze', 'Capturing screenshot')
670+
try {
671+
article.screenshot = await page.screenshot({ encoding: 'base64', type: 'jpeg', quality: 60 })
672+
} catch { /* ignore screenshot failures (e.g., page closed on timeout) */ }
673+
}
674+
675675
// Save the original HTML of the document (use page.content for robustness)
676676
if (staticHtmlOverride) {
677677
if (staticUrlOverride) article.url = staticUrlOverride

package-lock.json

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "horseman-article-parser",
3-
"version": "1.1.1",
3+
"version": "1.1.2",
44
"description": "Web Page Inspection Tool. Sentiment Analysis, Keyword Extraction, Named Entity Recognition & Spell Check",
55
"type": "module",
66
"main": "index.js",
@@ -59,7 +59,8 @@
5959
"eslint-plugin-json": "^4.0.1",
6060
"eslint-plugin-n": "^17.21.3",
6161
"eslint-plugin-promise": "^7.2.1",
62-
"jsdoc-to-markdown": "^8.0.0"
62+
"jsdoc-to-markdown": "^8.0.0",
63+
"jpeg-js": "^0.4.4"
6364
},
6465
"overrides": {
6566
"puppeteer-extra-plugin-user-data-dir": "file:overrides/puppeteer-extra-plugin-user-data-dir"

tests/parseArticle.test.js

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import fs from 'fs'
44
import http from 'node:http'
55
import puppeteer from 'puppeteer-extra'
66
import { parseArticle } from '../index.js'
7+
import jpeg from 'jpeg-js'
78

89
// Silent socket to suppress parser status logs during tests
910
const quietSocket = { emit: () => {} }
@@ -77,6 +78,43 @@ test('parseArticle captures a screenshot when enabled', { timeout: TEST_TIMEOUT
7778
assert.ok(Buffer.from(article.screenshot, 'base64').length > 1000)
7879
})
7980

81+
test('parseArticle screenshot occurs after consent dismissal', { timeout: TEST_TIMEOUT }, async (t) => {
82+
const html = `<!doctype html><html><head><title>Consent</title></head>
83+
<body style="margin:0">
84+
<div id="overlay" style="position:fixed;top:0;left:0;width:100vw;height:100vh;background:red;display:flex;align-items:center;justify-content:center;">
85+
<button id="accept">accept</button>
86+
</div>
87+
<article style="width:100vw;height:100vh;background:green"></article>
88+
<script>document.getElementById('accept').addEventListener('click',()=>document.getElementById('overlay').remove())</script>
89+
</body></html>`
90+
const server = http.createServer((req, res) => { res.end(html) })
91+
await new Promise(resolve => server.listen(0, resolve))
92+
const { port } = server.address()
93+
const url = `http://127.0.0.1:${port}`
94+
let article
95+
try {
96+
article = await parseArticle({
97+
url,
98+
enabled: ['screenshot'],
99+
timeoutMs: PARSE_TIMEOUT,
100+
contentWaitSelectors: ['article'],
101+
contentWaitTimeoutMs: 1,
102+
skipReadabilityWait: true,
103+
puppeteer: { launch: { headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] } }
104+
}, quietSocket)
105+
} catch (err) {
106+
t.skip('puppeteer unavailable: ' + err.message)
107+
server.close()
108+
return
109+
}
110+
server.close()
111+
const buf = Buffer.from(article.screenshot, 'base64')
112+
const { width, height, data } = jpeg.decode(buf)
113+
const mid = ((Math.floor(height / 2) * width) + Math.floor(width / 2)) * 4
114+
const r = data[mid], g = data[mid + 1], b = data[mid + 2]
115+
assert.ok(g > r && g > b, `expected green to dominate, got r=${r} g=${g} b=${b}`)
116+
})
117+
80118
test('parseArticle uses rules overrides for title and content', { timeout: TEST_TIMEOUT }, async (t) => {
81119
const longText = 'Incorrect '.repeat(30)
82120
const html = `<html><head><title>Wrong</title></head><body><article><p>${longText}</p></article></body></html>`

0 commit comments

Comments
 (0)