Skip to content

Commit 1ece390

Browse files
feat: pseudo directives with :remove (#5397)
Co-authored-by: Philipp Claßen <philipp.classen@posteo.de>
1 parent 28f07fe commit 1ece390

File tree

11 files changed

+404
-48
lines changed

11 files changed

+404
-48
lines changed

packages/adblocker-content/src/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import type { AST } from '@ghostery/adblocker-extended-selectors';
1010

11-
const SCRIPT_ID = 'cliqz-adblocker-script';
11+
const SCRIPT_ID = 'ghostery-adblocker-script';
1212
const IGNORED_TAGS = new Set(['br', 'head', 'link', 'meta', 'script', 'style', 's']);
1313

1414
export type Lifecycle = 'start' | 'dom-update';
@@ -27,8 +27,8 @@ export interface IMessageFromBackground {
2727
extended: {
2828
ast: AST;
2929
id: number;
30-
remove: boolean;
3130
attribute?: string | undefined;
31+
directive?: AST | undefined;
3232
}[];
3333
}
3434

packages/adblocker-extended-selectors/src/eval.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,3 +503,46 @@ export function querySelectorAll(element: Element, selector: AST): Element[] {
503503

504504
return [];
505505
}
506+
507+
/**
508+
* Executes pseudo-directive on the element.
509+
* @param element The target from normal or extended selector.
510+
* @param selector The AST only containing pseudo directive.
511+
*/
512+
export function handlePseudoDirective(element: Element, selector: AST): void {
513+
if (selector.type !== 'pseudo-class') {
514+
return;
515+
}
516+
517+
if (selector.name === 'remove') {
518+
element.remove();
519+
} else if (selector.name === 'remove-attr') {
520+
if (selector.argument === undefined) {
521+
return;
522+
} else if (selector.argument.startsWith('/') && selector.argument.endsWith('/')) {
523+
const regex = parseRegex(selector.argument);
524+
for (let i = element.attributes.length - 1; i >= 0; i--) {
525+
const attribute = element.attributes.item(i);
526+
if (attribute !== null && regex.test(attribute.name)) {
527+
element.removeAttribute(attribute.name);
528+
}
529+
}
530+
} else {
531+
return element.removeAttribute(stripsWrappingQuotes(selector.argument));
532+
}
533+
} else if (selector.name === 'remove-class') {
534+
if (selector.argument === undefined) {
535+
return;
536+
} else if (selector.argument.startsWith('/') && selector.argument.endsWith('/')) {
537+
const regex = parseRegex(selector.argument);
538+
for (let i = element.classList.length - 1; i >= 0; i--) {
539+
const className = element.classList.item(i);
540+
if (className !== null && regex.test(className)) {
541+
element.classList.remove(className);
542+
}
543+
}
544+
} else {
545+
return element.classList.remove(stripsWrappingQuotes(selector.argument));
546+
}
547+
}
548+
}

packages/adblocker-extended-selectors/src/extended.ts

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
import { tokenize, RECURSIVE_PSEUDO_CLASSES } from './parse.js';
1010

11+
import type { AST, PseudoClass } from './types.js';
12+
1113
export const EXTENDED_PSEUDO_CLASSES = new Set([
1214
// '-abp-contains',
1315
// '-abp-has',
@@ -92,6 +94,10 @@ export const PSEUDO_CLASSES = new Set([
9294
// this reason.
9395
export const PSEUDO_ELEMENTS = new Set(['after', 'before', 'first-letter', 'first-line']);
9496

97+
// Pseudo directives are pseudo-classes containing actions. It is
98+
// still not a standard CSS spec but defines custom action.
99+
export const PSEUDO_DIRECTIVES = new Set(['remove', 'remove-attr', 'remove-class']);
100+
95101
export enum SelectorType {
96102
Normal,
97103
Extended,
@@ -111,9 +117,13 @@ export function classifySelector(selector: string): SelectorType {
111117
for (const token of tokens) {
112118
if (token.type === 'pseudo-class') {
113119
const { name } = token;
114-
if (EXTENDED_PSEUDO_CLASSES.has(name) === true) {
120+
if (EXTENDED_PSEUDO_CLASSES.has(name) === true || PSEUDO_DIRECTIVES.has(name) === true) {
115121
foundSupportedExtendedSelector = true;
116-
} else if (PSEUDO_CLASSES.has(name) === false && PSEUDO_ELEMENTS.has(name) === false) {
122+
} else if (
123+
PSEUDO_CLASSES.has(name) === false &&
124+
PSEUDO_ELEMENTS.has(name) === false
125+
// `PSEUDO_DIRECTIVES.has(name)` is always `false` here.
126+
) {
117127
return SelectorType.Invalid;
118128
}
119129

@@ -148,3 +158,119 @@ export function classifySelector(selector: string): SelectorType {
148158

149159
return SelectorType.Normal;
150160
}
161+
162+
/**
163+
* Exposes ASTs per purpose. For an instance, it distinguishes
164+
* a directive selector from element selectors.
165+
* @returns "element" AST and "directive" AST; no "element" AST
166+
* means there's no selector, no "directive" AST means there's no
167+
* pseudo-directive.
168+
*/
169+
export function destructAST(ast: AST): { element: AST; directive: PseudoClass | null } {
170+
// If the root AST type is 'pseudo-class', it means the
171+
// selector starts like `:pseudo-class()` without any other
172+
// types of selectors. We need to check if the AST is pseudo-
173+
// directive. Currently, this is not possible as we drop these
174+
// filters from the parsing phase.
175+
// if (ast.type === 'pseudo-class' && PSEUDO_DIRECTIVES.has(ast.name)) {
176+
// return {
177+
// element: null,
178+
// directive: ast,
179+
// };
180+
// }
181+
182+
// If the root AST type is 'compound', it means there's
183+
// multiple AST nodes before the pseudo-directive. A compound
184+
// cannot hold another compound as its children thanks to the
185+
// parser characteristic. Also, the parser will group every
186+
// other selectors such as 'complex', simplyfying the AST. It
187+
// will look like 'some-selectors...:pseudo-class()`.
188+
if (ast.type === 'compound') {
189+
// We pick-up the last node and check if that's a pseudo-
190+
// directive.
191+
const last = ast.compound[ast.compound.length - 1];
192+
if (last.type === 'pseudo-class' && PSEUDO_DIRECTIVES.has(last.name)) {
193+
// Compound selectors have >=2 elements. When the length is
194+
// 2: e.g. ['a', ':directive'], return 'a'. When the length
195+
// is 3 or bigger: e.g. ['a', 'b', ':directive'], return
196+
// ['a', 'b'] as a compound selector.
197+
if (ast.compound.length < 3) {
198+
return {
199+
element: ast.compound[0],
200+
directive: last,
201+
};
202+
}
203+
return {
204+
element: {
205+
type: 'compound',
206+
compound: ast.compound.slice(0, -1),
207+
},
208+
directive: last,
209+
};
210+
}
211+
}
212+
213+
// If there's no pseudo-directive, everything else would be
214+
// the element selector.
215+
return {
216+
element: ast,
217+
directive: null,
218+
};
219+
}
220+
221+
/**
222+
* Finds a position of a pseudo directive from the complete CSS
223+
* selector. You can split the selector into normal or extended
224+
* selector and pseudo directive using this function.
225+
* @returns The position of a pseudo directive, or -1
226+
*/
227+
export function indexOfPseudoDirective(selector: string): number {
228+
// Directives are not chainable. We manually parse from the
229+
// backwards and break the process down into multiple loops for
230+
// optimised code path.
231+
let i = selector.lastIndexOf(')');
232+
let c = -1; // Character code.
233+
234+
if (i === -1) {
235+
return -1;
236+
}
237+
238+
// Look for the potential quoting.
239+
while (i--) {
240+
c = selector.charCodeAt(i);
241+
242+
// Skip control and whitespace characters.
243+
if (c < 33) continue;
244+
245+
if (c === 39 /* `'` */ || c === 34 /* '"' */ || c === 96 /* '`' */) {
246+
// Run the first loop with the quoting expection.
247+
while (i--) {
248+
if (selector.charCodeAt(i) === c) {
249+
break;
250+
}
251+
}
252+
253+
break;
254+
}
255+
}
256+
257+
// If it was not a quoting, we try to find the parenthesis.
258+
if (i < 0) i = selector.length;
259+
260+
while (i--) {
261+
if (selector.charCodeAt(i) === 40 /* '(' */) {
262+
break;
263+
}
264+
}
265+
266+
// Look for the last definition character ':'.
267+
c = selector.lastIndexOf(':', i);
268+
269+
// We stored the position of `:` in `c` and the position of `(`
270+
// in `i`, so we can check the name of the pseudo directive.
271+
if (PSEUDO_DIRECTIVES.has(selector.slice(c + 1, i))) {
272+
return c;
273+
}
274+
275+
return -1;
276+
}

packages/adblocker-extended-selectors/src/index.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77
*/
88

99
export { parse, tokenize } from './parse.js';
10-
export { querySelectorAll, matches } from './eval.js';
10+
export { querySelectorAll, matches, handlePseudoDirective } from './eval.js';
1111
export * from './types.js';
1212
export {
1313
EXTENDED_PSEUDO_CLASSES,
1414
PSEUDO_CLASSES,
1515
PSEUDO_ELEMENTS,
1616
SelectorType,
1717
classifySelector,
18+
destructAST,
19+
indexOfPseudoDirective,
1820
} from './extended.js';

0 commit comments

Comments
 (0)