Skip to content
This repository was archived by the owner on Nov 15, 2017. It is now read-only.

Commit 234e398

Browse files
committed
support for wildcard + fine tuning performance
1 parent 024113a commit 234e398

File tree

1 file changed

+83
-70
lines changed

1 file changed

+83
-70
lines changed

js/abp-filters.js

Lines changed: 83 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ Net requests (3rd party / all): 906 / 1,690
6363
Bandwidth: 25,440,697 bytes
6464
Idle mem after: 120 MB
6565
66+
67+
Complex filters count no '*' support: 16,637
68+
Complex filters count with '*' support: 18,741
6669
*/
6770

6871
var filterDict = {};
@@ -82,100 +85,105 @@ var reToken = /[%0-9A-Za-z]{2,}/g;
8285
var Filter = function(s, tokenBeg, tokenLen) {
8386
this.s = s;
8487
this.tokenBeg = tokenBeg;
85-
this.tokenLen = tokenLen;
88+
// We do not need this at this point, so keep object size to a minimum
89+
// this.tokenLen = tokenLen;
8690
this.next = undefined;
8791
};
8892

8993
Filter.prototype.match = function(s, tokenBeg) {
90-
// rhill 2014-03-05: Benchmarking shows that's the fastest way to do this.
91-
var filterBeg = tokenBeg - this.tokenBeg;
92-
return s.indexOf(this.s, filterBeg) === filterBeg;
94+
return false;
9395
};
9496

9597
/******************************************************************************/
9698

97-
// Example:
98-
// given: "__/abcde-ghijk*mnopqr"
99-
// token: "abcde"
100-
// this.tokenBeg: 3
101-
// this.tokenLen: 5
102-
// Align origins of global and local strings
103-
// Then loop for each segment
104-
// local l offset = 0
105-
// local r offset = indexOf('*') = 14
106-
// So test local segment "__/abcde-ghijk" against external segment with
107-
// offset transposed into global coords.
108-
// Then repeat for next plain segment, after skipping wildcard. Etc.
109-
110-
// This needs more work, and especially benchmarks against regex.
111-
// My expectation though is that using indexOf() is faster for filters
112-
// which have a single wildcard (large majority), while a regex would work
113-
// for maybe 2 or 3 and more wildcards (both approaches require an overhead).
99+
var FilterPlain = function(s, tokenBeg, tokenLen) {
100+
Filter.apply(this, arguments);
101+
};
114102

115-
// I will collate here real cases which to use in a jsperf benchmark:
103+
FilterPlain.prototype.match = function(s, tokenBeg) {
104+
tokenBeg -= this.tokenBeg;
105+
return s.indexOf(this.s, tokenBeg) === tokenBeg;
106+
};
116107

117-
// Hits:
108+
/******************************************************************************/
118109

119-
// l.yimg.com*/img/badge-
120-
// http://mail.yimg.com/nq/assets/micro2/v47/img/badge-sprites.png
110+
FilterPlainPrefix0 = function(s, tokenBeg) {
111+
Filter.apply(this, arguments);
112+
};
121113

122-
// yimg.com/ss/rapid-*.js
123-
// http://l.yimg.com/ss/rapid-3.11.js
114+
FilterPlainPrefix0.prototype.match = function(s, tokenBeg) {
115+
return s.indexOf(this.s, tokenBeg) === tokenBeg;
116+
};
124117

125-
// arstechnica.net*/sponsor-
126-
// http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/smartstream/ibm/sponsor-msg.png
118+
/******************************************************************************/
127119

128-
// msn.com*/report.js
129-
// http://blu.stj.s-msn.com/br/csl/js/D8CC944FD882D1B64561551E54F9CF3B/report.js
120+
FilterPlainPrefix1 = function(s, tokenBeg) {
121+
Filter.apply(this, arguments);
122+
};
130123

131-
// /cnwk.1d/*/apex.js
132-
// http://cn.cbsimg.net/cnwk.1d/Aud/javascript/gamespot/apex.js?_=1394157449540
124+
FilterPlainPrefix1.prototype.match = function(s, tokenBeg) {
125+
return s.indexOf(this.s, tokenBeg - 1) === tokenBeg - 1;
126+
};
133127

134-
// Misses:
128+
/******************************************************************************/
135129

136-
// ...
130+
// With a single wildcard, indexOf is best.
131+
// See: http://jsperf.com/regexp-vs-indexof-for-abp/4
137132

138-
var FilterWildcard = function(s, tokenBeg, tokenLen) {
133+
FilterSingleWildcard = function(s, tokenBeg, tokenLen) {
139134
Filter.apply(this, arguments);
135+
this.wcOffset = s.indexOf('*');
136+
this.lSegment = s.slice(0, this.wcOffset);
137+
this.rSegment = s.slice(this.wcOffset + 1);
140138
};
141139

142-
FilterWildcard.prototype.match = function(s, tokenBeg) {
143-
var globalLeftOffset = tokenBeg - this.tokenBeg;
144-
var localStr = this.s;
140+
FilterSingleWildcard.prototype.match = function(s, tokenBeg) {
141+
tokenBeg - this.tokenBeg;
142+
return s.indexOf(this.lSegment, tokenBeg) === tokenBeg &&
143+
s.indexOf(this.rSegment, tokenBeg + this.wcOffset) > 0;
144+
};
145145

146-
// First segment must match exactly
147-
var localLeftOffset = 0;
148-
var localRightOffset = localStr.indexOf('*', localLeftOffset);
149-
if ( s.indexOf(localStr.slice(localLeftOffset, localRightOffset), globalLeftOffset) !== globalLeftOffset ) {
150-
return false;
151-
}
152-
globalLeftOffset += localRightOffset;
153-
localLeftOffset = localRightOffset + 1;
154-
155-
var localLen = localStr.length;
156-
while ( localLeftOffset < localLen ) {
157-
localRightOffset = localStr.indexOf('*', localLeftOffset);
158-
if ( localRightOffset < 0 ) {
159-
localRightOffset = localLen;
160-
}
161-
globalLeftOffset = s.indexOf(localStr.slice(localLeftOffset, localRightOffset), globalLeftOffset);
162-
if ( globalLeftOffset < 0 ) {
163-
return false;
164-
}
165-
globalLeftOffset += localRightOffset - localLeftOffset;
166-
localLeftOffset = localRightOffset + 1;
167-
}
168-
return true;
146+
/******************************************************************************/
147+
148+
// With many wildcards, a regex is best.
149+
150+
FilterManyWildcards = function(s, tokenBeg, tokenLen) {
151+
Filter.apply(this, arguments);
152+
// Ref: escaper taken from:
153+
// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
154+
// Except modified for the purpose here.
155+
this.re = new RegExp('^' + s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1').replace(/\*/g, '.*'));
156+
};
157+
158+
FilterManyWildcards.prototype.match = function(s, tokenBeg) {
159+
return this.re.test(s.slice(tokenBeg - this.tokenBeg));
169160
};
170161

171162
/******************************************************************************/
172163

173164
FilterFactory = function(s, tokenBeg, tokenLen) {
174-
var rWildcard = s.indexOf('*');
175-
if ( rWildcard < 0 ) {
176-
return new Filter(s, tokenBeg, tokenLen);
165+
var wcOffset = s.indexOf('*');
166+
if ( wcOffset > 0 ) {
167+
return FilterWildcardFactory(s, tokenBeg, tokenLen);
177168
}
178-
return new FilterWildcard(s, tokenBeg, tokenLen);
169+
return FilterPlainFactory(s, tokenBeg, tokenLen);
170+
};
171+
172+
FilterPlainFactory = function(s, tokenBeg, tokenLen) {
173+
if ( tokenBeg === 0 ) {
174+
return new FilterPlainPrefix0(s, 0, tokenLen);
175+
}
176+
if ( tokenBeg === 1 ) {
177+
return new FilterPlainPrefix1(s, 1, tokenLen);
178+
}
179+
return new FilterPlain(s, tokenBeg, tokenLen);
180+
};
181+
182+
FilterWildcardFactory = function(s, tokenBeg, tokenLen) {
183+
if ( (/\*[^*]\*/).test(s) ) {
184+
return FilterManyWildcards(s, tokenBeg, tokenLen);
185+
}
186+
return new FilterSingleWildcard(s, tokenBeg, tokenLen);
179187
};
180188

181189
/******************************************************************************/
@@ -201,6 +209,7 @@ var badTokens = {
201209
'http': true,
202210
'https': true,
203211
'js': true,
212+
'news': true,
204213
'www': true
205214
};
206215

@@ -275,6 +284,9 @@ var add = function(s) {
275284
var tokenEnd = reToken.lastIndex;
276285

277286
filter = FilterFactory(s, tokenBeg, token.length);
287+
if ( !filter ) {
288+
return false;
289+
}
278290
filterDict[s] = filter;
279291

280292
var prefixKey = s.substring(tokenBeg - 1, tokenBeg);
@@ -296,12 +308,13 @@ var freeze = function() {
296308

297309
/******************************************************************************/
298310

299-
var matchStringToFilterChain = function(filter, s, tokenBeg) {
300-
while ( filter !== undefined ) {
301-
if ( filter.match(s, tokenBeg) ) {
311+
var matchStringToFilterChain = function(f, s, tokenBeg) {
312+
while ( f !== undefined ) {
313+
if ( f.match(s, tokenBeg) ) {
314+
// console.log('abp-filters.js> matchStringToFilterChain(): "%s" matches "%s"', f.s, s);
302315
return true;
303316
}
304-
filter = filter.next;
317+
f = f.next;
305318
}
306319
return false;
307320
};

0 commit comments

Comments
 (0)