Skip to content
This repository was archived by the owner on Nov 15, 2017. It is now read-only.

Commit f476660

Browse files
committed
removed overcomplication of abp filters code + fixed potential false negatives
1 parent 3caca0a commit f476660

File tree

2 files changed

+43
-148
lines changed

2 files changed

+43
-148
lines changed

js/abp-filters.js

Lines changed: 41 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ Idle mem after: 120 MB
6565
6666
*/
6767

68-
var runtimeId = 1;
69-
7068
var filterDict = {};
7169
var filterDictFrozenCount = 0;
7270
var filterIndex = {};
@@ -82,7 +80,6 @@ var reToken = /[%0-9A-Za-z]{2,}/g;
8280
/******************************************************************************/
8381

8482
var FilterEntry = function(token) {
85-
this.id = runtimeId++;
8683
this.token = token;
8784
this.prefix = '';
8885
this.suffix = '';
@@ -101,7 +98,6 @@ FilterEntry.prototype.matchString = function(s, tokenBeg, tokenEnd) {
10198
// Reset all, thus reducing to a minimum memory footprint of the context.
10299

103100
var reset = function() {
104-
runtimeId = 1;
105101
filterDict = {};
106102
filterDictFrozenCount = 0;
107103
filterIndex = {};
@@ -110,9 +106,10 @@ var reset = function() {
110106
/******************************************************************************/
111107

112108
// Given a string, find a good token. Tokens which are too generic, i.e. very
113-
// common while likely to be false positives, are not good, if possible.
114-
// These are collated manually. This has a *significant* positive impact on
109+
// common with a high probability of ending up as a false positive, are not
110+
// good. Avoid if possible. This has a *significant* positive impact on
115111
// performance.
112+
// These "bad tokens" are collated manually.
116113

117114
var badTokens = {
118115
'com': true,
@@ -207,136 +204,34 @@ var add = function(s) {
207204
var suffixKey = suffix.length > 0 ? suffix.charAt(0) : '0';
208205

209206
var fidx = filterIndex;
210-
if ( fidx[token] === undefined ) {
211-
fidx[token] = {};
212-
}
213-
var listkey = prefixKey + suffixKey;
214-
if ( fidx[token][listkey] === undefined ) {
215-
fidx[token][listkey] = [filter.id];
207+
var tokenKey = prefixKey + token + suffixKey;
208+
var tokenEntry = fidx[tokenKey];
209+
if ( tokenEntry === undefined ) {
210+
fidx[tokenKey] = filter;
211+
} else if ( tokenEntry instanceof FilterEntry ) {
212+
fidx[tokenKey] = [tokenEntry, filter];
216213
} else {
217-
fidx[token][listkey].push(filter.id);
214+
tokenEntry.push(filter);
218215
}
219216

220217
return true;
221218
};
222219

223-
224-
/******************************************************************************/
225-
226-
var mergeSubdict = function(token) {
227-
var tokenEntry = filterIndex[token];
228-
if ( tokenEntry === undefined ) {
229-
return;
230-
}
231-
var list = [];
232-
var value;
233-
for ( var key in tokenEntry ) {
234-
if ( !tokenEntry.hasOwnProperty(key) ) {
235-
continue;
236-
}
237-
value = tokenEntry[key];
238-
if ( typeof value === 'number' ) {
239-
list.push(value);
240-
} else {
241-
list = list.concat(value);
242-
}
243-
}
244-
filterIndex[token] = list.join(' ');
245-
};
246-
247220
/******************************************************************************/
248221

249222
var freeze = function() {
250-
// TODO: find out if JS engine translate the stringified id into
251-
// a number internally. I would think not, but if so, than there might
252-
// be a performance hit. The JS array results in a smaller memory
253-
// footprint... Need to evaluate the optimal representation.
254-
var farr = [];
255-
var fdict = filterDict;
256-
257-
var f;
258-
for ( var s in fdict ) {
259-
if ( !fdict.hasOwnProperty(s) ) {
260-
continue;
261-
}
262-
f = fdict[s];
263-
farr[f.id] = f;
264-
}
265-
filterDict = farr;
266-
267-
var tokenEntry;
268-
var key, value;
269-
var lastKey;
270-
var kCount, vCount, vCountTotal;
271-
var tokenCountMax, kCountMax, vCountMax = 0;
272-
for ( var token in filterIndex ) {
273-
if ( !filterIndex.hasOwnProperty(token) ) {
274-
continue;
275-
}
276-
tokenEntry = filterIndex[token];
277-
kCount = vCount = vCountTotal = 0;
278-
for ( key in tokenEntry ) {
279-
if ( !tokenEntry.hasOwnProperty(key) ) {
280-
continue;
281-
}
282-
// No need to mutate to a string if there is only one
283-
// element in the array.
284-
lastKey = key;
285-
value = tokenEntry[key];
286-
kCount += 1;
287-
vCount = value.length;
288-
vCountTotal += vCount;
289-
if ( vCount < 2 ) {
290-
tokenEntry[key] = value[0];
291-
} else {
292-
tokenEntry[key] = value.join(' ');
293-
}
294-
if ( vCount > vCountMax ) {
295-
tokenCountMax = token;
296-
kCountMax = key;
297-
vCountMax = vCount;
298-
}
299-
}
300-
// Merge all sub-dicts into a single one at token dict level, if there
301-
// is not enough keys or values to justify the overhead.
302-
// Also, no need for a sub-dict if there is only one key.
303-
if ( kCount < 2 ) {
304-
filterIndex[token] = tokenEntry[lastKey];
305-
continue;
306-
}
307-
if ( vCountTotal < 4 ) {
308-
mergeSubdict(token);
309-
continue;
310-
}
311-
}
312-
313-
filterDictFrozenCount = farr.length;
314-
315-
// console.log('Dict stats:');
316-
// console.log('\tToken count:', Object.keys(filterIndex).length);
317-
// console.log('\tLargest list: "%s %s" has %d ids', tokenCountMax, kCountMax, vCountMax);
223+
filterDictFrozenCount = Object.keys(filterDict).length;
224+
filterDict = null;
318225
};
319226

320227
/******************************************************************************/
321228

322-
var matchFromFilterIndex = function(s, tokenBeg, tokenEnd, index) {
323-
return filterDict[index].matchString(s, tokenBeg, tokenEnd);
324-
};
325-
326-
/******************************************************************************/
327-
328-
var matchFromFilterIndices = function(s, tokenBeg, tokenEnd, indices) {
329-
var indicesEnd = indices.length;
330-
var indexBeg = 0, indexEnd;
331-
while ( indexBeg < indicesEnd ) {
332-
indexEnd = indices.indexOf(' ', indexBeg);
333-
if ( indexEnd < 0 ) {
334-
indexEnd = indicesEnd;
335-
}
336-
if ( filterDict[indices.slice(indexBeg, indexEnd)].matchString(s, tokenBeg, tokenEnd) ) {
229+
var matchFromFilterArray = function(s, tokenBeg, tokenEnd, filters) {
230+
var i = filters.length;
231+
while ( i-- ) {
232+
if ( filters[i].matchString(s, tokenBeg, tokenEnd) ) {
337233
return true;
338234
}
339-
indexBeg = indexEnd + 1;
340235
}
341236
return false;
342237
};
@@ -347,54 +242,54 @@ var matchFromSomething = function(s, tokenBeg, tokenEnd, something) {
347242
if ( something === undefined ) {
348243
return false;
349244
}
350-
if ( typeof something === 'number') {
351-
return filterDict[something].matchString(s, tokenBeg, tokenEnd);
352-
}
353-
if ( typeof something === 'string') {
354-
return matchFromFilterIndices(s, tokenBeg, tokenEnd, something);
355-
}
356245
if ( something instanceof FilterEntry ) {
357246
return something.matchString(s, tokenBeg, tokenEnd);
358247
}
359-
return false;
248+
return matchFromFilterArray(s, tokenBeg, tokenEnd, something);
360249
};
361250

362251
/******************************************************************************/
363252

364253
var matchString = function(s) {
365-
if ( filterDictFrozenCount === 0 ) {
366-
return false;
367-
}
368-
254+
var sLen = s.length;
369255
var matches;
370-
var token, tokenEntry;
256+
var token;
371257
var tokenBeg, tokenEnd;
372258
var prefixKey, suffixKey;
259+
var fidx = filterIndex;
373260

374261
reToken.lastIndex = 0;
375262
while ( matches = reToken.exec(s) ) {
376263
token = matches[0];
377-
tokenEntry = filterIndex[token];
378-
if ( tokenEntry === undefined ) {
379-
continue;
380-
}
381264
tokenBeg = matches.index;
382265
tokenEnd = reToken.lastIndex;
383-
if ( typeof tokenEntry !== 'object' ) {
384-
if ( matchFromSomething(s, tokenBeg, tokenEnd, tokenEntry) ) {
266+
prefixKey = tokenBeg > 0 ? s.charAt(matches.index-1) : '0';
267+
suffixKey = tokenEnd < s.length ? s.charAt(tokenEnd) : '0';
268+
269+
if ( tokenBeg > 0 && tokenEnd < sLen ) {
270+
if ( matchFromSomething(s, tokenBeg, tokenEnd, fidx[prefixKey + token + suffixKey]) ||
271+
matchFromSomething(s, tokenBeg, tokenEnd, fidx[prefixKey + token + '0']) ||
272+
matchFromSomething(s, tokenBeg, tokenEnd, fidx['0' + token + suffixKey]) ||
273+
matchFromSomething(s, tokenBeg, tokenEnd, fidx['0' + token + '0']) ) {
385274
return true;
386275
}
387276
continue;
388277
}
389-
prefixKey = tokenBeg > 0 ? s.charAt(matches.index-1) : '0';
390-
suffixKey = tokenEnd < s.length ? s.charAt(tokenEnd) : '0';
391-
if ( matchFromSomething(s, tokenBeg, tokenEnd, tokenEntry[prefixKey + suffixKey]) ) {
392-
return true;
278+
if ( tokenBeg > 0 ) {
279+
if ( matchFromSomething(s, tokenBeg, tokenEnd, fidx[prefixKey + token + '0']) ||
280+
matchFromSomething(s, tokenBeg, tokenEnd, fidx['0' + token + '0']) ) {
281+
return true;
282+
}
283+
continue;
393284
}
394-
if ( matchFromSomething(s, tokenBeg, tokenEnd, tokenEntry[prefixKey + '0']) ) {
395-
return true;
285+
if ( tokenEnd < sLen ) {
286+
if ( matchFromSomething(s, tokenBeg, tokenEnd, fidx['0' + token + suffixKey]) ||
287+
matchFromSomething(s, tokenBeg, tokenEnd, fidx['0' + token + '0']) ) {
288+
return true;
289+
}
290+
continue;
396291
}
397-
if ( matchFromSomething(s, tokenBeg, tokenEnd, tokenEntry['0' + suffixKey]) ) {
292+
if ( matchFromSomething(s, tokenBeg, tokenEnd, fidx['0' + token + '0']) ) {
398293
return true;
399294
}
400295
}

js/traffic.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ function onBeforeChromeExtensionRequestHandler(details) {
150150
function onBeforeRequestHandler(details) {
151151
// console.debug('onBeforeRequestHandler()> "%s": %o', details.url, details);
152152

153-
// rhill 2014-02-17: Ignore 'filesystem:chrome-extension://': this can
154-
// happen when listening to 'chrome-extension://'.
153+
// rhill 2014-02-17: Ignore 'filesystem:': this can happen when listening
154+
// to 'chrome-extension://'.
155155
var requestURL = details.url;
156156
if ( requestURL.indexOf('filesystem:') === 0 ) {
157157
return;

0 commit comments

Comments
 (0)