Skip to content

Commit cbf1fd3

Browse files
committed
Try to use regex heuristics instead of llm for generating one-click rules
1 parent 69fd7a5 commit cbf1fd3

File tree

3 files changed

+30
-14
lines changed

3 files changed

+30
-14
lines changed

post-processing/generate-autoconsent-rules/generation.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,10 @@ function generateRulesForSite(region, initialUrl, finalUrl, collectorResult, mat
248248
const reviewNotes = [];
249249
let keptCount = 0;
250250

251-
const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch);
252-
if (llmConfirmedPopups.length > 1 || llmConfirmedPopups[0].rejectButtons.length > 1) {
251+
// const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch);
252+
const regexConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.regexMatch);
253+
// if (llmConfirmedPopups.length > 1 || llmConfirmedPopups[0].rejectButtons.length > 1) {
254+
if (regexConfirmedPopups.length > 1 || regexConfirmedPopups[0].rejectButtons.length > 1) {
253255
console.warn('Multiple cookie popups or reject buttons found in', initialUrl);
254256
reviewNotes.push({
255257
needsReview: false, // it's not a problem by itself, unless this leads to multiple _rules_ generated, but we check that separately.
@@ -261,7 +263,8 @@ function generateRulesForSite(region, initialUrl, finalUrl, collectorResult, mat
261263

262264
// go over all frames, all confirmed popups within them, and all reject buttons inside
263265
for (const frame of collectorResult.scrapedFrames) {
264-
for (const popup of frame.potentialPopups.filter((p) => p.llmMatch)) {
266+
// for (const popup of frame.potentialPopups.filter((p) => p.llmMatch)) {
267+
for (const popup of frame.potentialPopups.filter((p) => p.regexMatch)) {
265268
for (const button of popup.rejectButtons) {
266269
if (ruleForButtonExists(button, matchingRules, newRules, rulesToOverride)) {
267270
// if there is an existing rule with the same reject button, do nothing

post-processing/generate-autoconsent-rules/main.js

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -148,16 +148,19 @@ async function processCookiePopupsForSite(globalParams, { finalUrl, initialUrl,
148148

149149
const updatedExistingRules = structuredClone(existingRules);
150150

151-
const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch);
151+
// const llmConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.llmMatch);
152+
const regexConfirmedPopups = collectorResult.scrapedFrames.flatMap((frame) => frame.potentialPopups).filter((popup) => popup.regexMatch);
152153

153154
// shortcut if no popups with llmMatch
154-
if (llmConfirmedPopups.length === 0) {
155+
// if (llmConfirmedPopups.length === 0) {
156+
if (regexConfirmedPopups.length === 0) {
155157
return { newRuleFiles, updatedRuleFiles, keptCount: 0, reviewNotes: [], updatedExistingRules };
156158
}
157159

158160
const matchingRules = findMatchingExistingRules(initialUrl, finalUrl, collectorResult, existingRules);
159161
console.log(
160-
`Detected ${llmConfirmedPopups.length} unhandled cookie popup(s) on ${finalUrl} (matched ${matchingRules.length} existing rules)`,
162+
// `Detected ${llmConfirmedPopups.length} unhandled cookie popup(s) on ${finalUrl} (matched ${matchingRules.length} existing rules)`,
163+
`Detected ${regexConfirmedPopups.length} unhandled cookie popup(s) on ${finalUrl} (matched ${matchingRules.length} existing rules)`,
161164
);
162165
const { newRules, rulesToOverride, reviewNotes, keptCount } = generateRulesForSite(
163166
region,
@@ -309,26 +312,33 @@ async function processFiles(globalParams, existingRules) {
309312
totalSitesWithPopups++;
310313

311314
const matchedRules = collectorResult.cmps.map((cmp) => cmp.name.trim()).filter((name) => name !== '');
312-
const llmConfirmedPopups = collectorResult.scrapedFrames
315+
// const llmConfirmedPopups = collectorResult.scrapedFrames
316+
// .flatMap((frame) => frame.potentialPopups)
317+
// .filter((popup) => popup.llmMatch);
318+
const regexConfirmedPopups = collectorResult.scrapedFrames
313319
.flatMap((frame) => frame.potentialPopups)
314-
.filter((popup) => popup.llmMatch);
320+
.filter((popup) => popup.regexMatch);
315321
const screenshot = jsonData.data.screenshots;
316322

317323
if (hasKnownCmp(collectorResult.cmps)) {
318324
totalSitesWithKnownCmps++;
319325
autoconsentManifest.set(fileName, {
320326
siteUrl: jsonData.finalUrl,
321327
matchedRules,
322-
llmConfirmedPopups,
328+
// llmConfirmedPopups,
329+
regexConfirmedPopups,
323330
screenshot,
324331
newlyCreatedRules: [],
325332
updatedRules: [],
326333
reviewNotes: [],
327334
});
328335
} else {
329-
const llmConfirmedPopups = collectorResult.scrapedFrames
336+
// const llmConfirmedPopups = collectorResult.scrapedFrames
337+
// .flatMap((frame) => frame.potentialPopups)
338+
// .filter((popup) => popup.llmMatch);
339+
const regexConfirmedPopups = collectorResult.scrapedFrames
330340
.flatMap((frame) => frame.potentialPopups)
331-
.filter((popup) => popup.llmMatch);
341+
.filter((popup) => popup.regexMatch);
332342
/** @type {import('./types').AutoconsentManifestFileData[]} */
333343
let newRuleFiles = [];
334344
/** @type {import('./types').AutoconsentManifestFileData[]} */
@@ -337,7 +347,8 @@ async function processFiles(globalParams, existingRules) {
337347
/** @type {import('./types').ReviewNote[]} */
338348
let reviewNotes = [];
339349

340-
if (llmConfirmedPopups.length > 0) {
350+
// if (llmConfirmedPopups.length > 0) {
351+
if (regexConfirmedPopups.length > 0) {
341352
totalUnhandled++;
342353
const result = await processCookiePopupsForSite(globalParams, {
343354
finalUrl: jsonData.finalUrl,
@@ -351,7 +362,8 @@ async function processFiles(globalParams, existingRules) {
351362
autoconsentManifest.set(fileName, {
352363
siteUrl: jsonData.finalUrl,
353364
matchedRules,
354-
llmConfirmedPopups,
365+
// llmConfirmedPopups,
366+
regexConfirmedPopups,
355367
screenshot,
356368
newlyCreatedRules: newRuleFiles,
357369
updatedRules: updatedRuleFiles,

post-processing/generate-autoconsent-rules/types.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
* @typedef {{
4343
* siteUrl: string;
4444
* matchedRules: string[];
45-
* llmConfirmedPopups: PopupData[];
45+
* _llmConfirmedPopups?: PopupData[];
46+
* regexConfirmedPopups: PopupData[];
4647
* screenshot: string;
4748
* newlyCreatedRules: AutoconsentManifestFileData[];
4849
* updatedRules: AutoconsentManifestFileData[];

0 commit comments

Comments
 (0)