diff --git a/post-processing/generate-autoconsent-rules/generation.js b/post-processing/generate-autoconsent-rules/generation.js index 3545c16c..f3e49d0e 100644 --- a/post-processing/generate-autoconsent-rules/generation.js +++ b/post-processing/generate-autoconsent-rules/generation.js @@ -248,8 +248,12 @@ function generateRulesForSite(region, initialUrl, finalUrl, collectorResult, mat const reviewNotes = []; let keptCount = 0; - const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch); - if (llmConfirmedPopups.length > 1 || llmConfirmedPopups[0].rejectButtons.length > 1) { + // const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch); + const regexConfirmedPopups = collectorResult.scrapedFrames + .flatMap((frame) => frame.potentialPopups) + .filter((popup) => popup.regexMatch); + // if (llmConfirmedPopups.length > 1 || llmConfirmedPopups[0].rejectButtons.length > 1) { + if (regexConfirmedPopups.length > 1 || regexConfirmedPopups[0].rejectButtons.length > 1) { console.warn('Multiple cookie popups or reject buttons found in', initialUrl); reviewNotes.push({ needsReview: false, // it's not a problem by itself, unless this leads to multiple _rules_ generated, but we check that separately. @@ -261,7 +265,8 @@ function generateRulesForSite(region, initialUrl, finalUrl, collectorResult, mat // go over all frames, all confirmed popups within them, and all reject buttons inside for (const frame of collectorResult.scrapedFrames) { - for (const popup of frame.potentialPopups.filter((p) => p.llmMatch)) { + // for (const popup of frame.potentialPopups.filter((p) => p.llmMatch)) { + for (const popup of frame.potentialPopups.filter((p) => p.regexMatch)) { for (const button of popup.rejectButtons) { if (ruleForButtonExists(button, matchingRules, newRules, rulesToOverride)) { // if there is an existing rule with the same reject button, do nothing diff --git a/post-processing/generate-autoconsent-rules/main.js b/post-processing/generate-autoconsent-rules/main.js index 62d874e0..ef66e61f 100644 --- a/post-processing/generate-autoconsent-rules/main.js +++ b/post-processing/generate-autoconsent-rules/main.js @@ -148,16 +148,21 @@ async function processCookiePopupsForSite(globalParams, { finalUrl, initialUrl, const updatedExistingRules = structuredClone(existingRules); - const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch); + // const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch); + const regexConfirmedPopups = collectorResult.scrapedFrames + .flatMap((frame) => frame.potentialPopups) + .filter((popup) => popup.regexMatch); // shortcut if no popups with llmMatch - if (llmConfirmedPopups.length === 0) { + // if (llmConfirmedPopups.length === 0) { + if (regexConfirmedPopups.length === 0) { return { newRuleFiles, updatedRuleFiles, keptCount: 0, reviewNotes: [], updatedExistingRules }; } const matchingRules = findMatchingExistingRules(initialUrl, finalUrl, collectorResult, existingRules); console.log( - `Detected ${llmConfirmedPopups.length} unhandled cookie popup(s) on ${finalUrl} (matched ${matchingRules.length} existing rules)`, + // `Detected ${llmConfirmedPopups.length} unhandled cookie popup(s) on ${finalUrl} (matched ${matchingRules.length} existing rules)`, + `Detected ${regexConfirmedPopups.length} unhandled cookie popup(s) on ${finalUrl} (matched ${matchingRules.length} existing rules)`, ); const { newRules, rulesToOverride, reviewNotes, keptCount } = generateRulesForSite( region, @@ -309,9 +314,12 @@ async function processFiles(globalParams, existingRules) { totalSitesWithPopups++; const matchedRules = collectorResult.cmps.map((cmp) => cmp.name.trim()).filter((name) => name !== ''); - const llmConfirmedPopups = collectorResult.scrapedFrames + // const llmConfirmedPopups = collectorResult.scrapedFrames + // .flatMap((frame) => frame.potentialPopups) + // .filter((popup) => popup.llmMatch); + const regexConfirmedPopups = collectorResult.scrapedFrames .flatMap((frame) => frame.potentialPopups) - .filter((popup) => popup.llmMatch); + .filter((popup) => popup.regexMatch); const screenshot = jsonData.data.screenshots; if (hasKnownCmp(collectorResult.cmps)) { @@ -319,16 +327,20 @@ async function processFiles(globalParams, existingRules) { autoconsentManifest.set(fileName, { siteUrl: jsonData.finalUrl, matchedRules, - llmConfirmedPopups, + // llmConfirmedPopups, + regexConfirmedPopups, screenshot, newlyCreatedRules: [], updatedRules: [], reviewNotes: [], }); } else { - const llmConfirmedPopups = collectorResult.scrapedFrames + // const llmConfirmedPopups = collectorResult.scrapedFrames + // .flatMap((frame) => frame.potentialPopups) + // .filter((popup) => popup.llmMatch); + const regexConfirmedPopups = collectorResult.scrapedFrames .flatMap((frame) => frame.potentialPopups) - .filter((popup) => popup.llmMatch); + .filter((popup) => popup.regexMatch); /** @type {import('./types').AutoconsentManifestFileData[]} */ let newRuleFiles = []; /** @type {import('./types').AutoconsentManifestFileData[]} */ @@ -337,7 +349,8 @@ async function processFiles(globalParams, existingRules) { /** @type {import('./types').ReviewNote[]} */ let reviewNotes = []; - if (llmConfirmedPopups.length > 0) { + // if (llmConfirmedPopups.length > 0) { + if (regexConfirmedPopups.length > 0) { totalUnhandled++; const result = await processCookiePopupsForSite(globalParams, { finalUrl: jsonData.finalUrl, @@ -351,7 +364,8 @@ async function processFiles(globalParams, existingRules) { autoconsentManifest.set(fileName, { siteUrl: jsonData.finalUrl, matchedRules, - llmConfirmedPopups, + // llmConfirmedPopups, + regexConfirmedPopups, screenshot, newlyCreatedRules: newRuleFiles, updatedRules: updatedRuleFiles, diff --git a/post-processing/generate-autoconsent-rules/types.js b/post-processing/generate-autoconsent-rules/types.js index 81256f21..f86ba94a 100644 --- a/post-processing/generate-autoconsent-rules/types.js +++ b/post-processing/generate-autoconsent-rules/types.js @@ -42,7 +42,8 @@ * @typedef {{ * siteUrl: string; * matchedRules: string[]; - * llmConfirmedPopups: PopupData[]; + * _llmConfirmedPopups?: PopupData[]; + * regexConfirmedPopups: PopupData[]; * screenshot: string; * newlyCreatedRules: AutoconsentManifestFileData[]; * updatedRules: AutoconsentManifestFileData[];