Skip to content

Commit 73faae4

Browse files
authored
Expand CPM generated tules to more languages (#154)
* Add some button patterns for other languages * Clean button text before matching * Make sure button text is always a string * do not match empty button texts * Handle parsing errors * Fix crawler rate reporting: avoid collision with built-in :rate * more informative progress bar * Update German button patterns * Lint * update Italian button patterns * update Spanish button patterns * Update Brazilian Portuguese button patterns * Update Swedish button patterns
1 parent 855d483 commit 73faae4

File tree

5 files changed

+667
-42
lines changed

5 files changed

+667
-42
lines changed

collectors/CookiePopups/scrapeScript.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ function getButtonData(el) {
312312
.filter(b => isVisible(b) && !isDisabled(b) && b.innerText.trim());
313313

314314
return actionableButtons.map(b => ({
315-
text: b.innerText,
315+
text: b.innerText ?? b.textContent ?? '',
316316
selector: getUniqueSelector(b),
317317
}));
318318
}

post-processing/detect-cookie-popups.js

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ async function main() {
169169
});
170170

171171
const pages = fs.readdirSync(crawlDir).filter(name => name.endsWith('.json') && name !== 'metadata.json');
172-
const progressBar = process.env.IS_CI ? null : new ProgressBar('[:bar] :percent ETA :etas :page', {
172+
const progressBar = process.env.IS_CI ? null : new ProgressBar('[:bar] :current/:total :percent ETA :etas rate :rate/s :page', {
173173
complete: chalk.green('='),
174174
incomplete: ' ',
175175
total: pages.length,
@@ -189,8 +189,17 @@ async function main() {
189189
console.log(`${index + 1}/${pages.length} : ${page}`);
190190
}
191191
const filePath = path.join(crawlDir, page);
192-
const contents = await fs.promises.readFile(filePath, 'utf-8');
193-
const data = JSON.parse(contents.toString());
192+
193+
let contents;
194+
let data;
195+
try {
196+
contents = await fs.promises.readFile(filePath, 'utf-8');
197+
data = JSON.parse(contents.toString());
198+
} catch (error) {
199+
console.error(`Error reading or parsing file ${page}:`, error.message);
200+
progressBar?.tick({ page });
201+
return;
202+
}
194203

195204
if (!data.data || !data.data.cookiepopups) {
196205
progressBar?.tick({ page });

0 commit comments

Comments
 (0)