@@ -63,6 +63,9 @@ Net requests (3rd party / all): 906 / 1,690
6363Bandwidth: 25,440,697 bytes
6464Idle mem after: 120 MB
6565
66+
67+ Complex filters count no '*' support: 16,637
68+ Complex filters count with '*' support: 18,741
6669*/
6770
6871var filterDict = { } ;
@@ -82,100 +85,105 @@ var reToken = /[%0-9A-Za-z]{2,}/g;
8285var Filter = function ( s , tokenBeg , tokenLen ) {
8386 this . s = s ;
8487 this . tokenBeg = tokenBeg ;
85- this . tokenLen = tokenLen ;
88+ // We do not need this at this point, so keep object size to a minimum
89+ // this.tokenLen = tokenLen;
8690 this . next = undefined ;
8791} ;
8892
8993Filter . prototype . match = function ( s , tokenBeg ) {
90- // rhill 2014-03-05: Benchmarking shows that's the fastest way to do this.
91- var filterBeg = tokenBeg - this . tokenBeg ;
92- return s . indexOf ( this . s , filterBeg ) === filterBeg ;
94+ return false ;
9395} ;
9496
9597/******************************************************************************/
9698
97- // Example:
98- // given: "__/abcde-ghijk*mnopqr"
99- // token: "abcde"
100- // this.tokenBeg: 3
101- // this.tokenLen: 5
102- // Align origins of global and local strings
103- // Then loop for each segment
104- // local l offset = 0
105- // local r offset = indexOf('*') = 14
106- // So test local segment "__/abcde-ghijk" against external segment with
107- // offset transposed into global coords.
108- // Then repeat for next plain segment, after skipping wildcard. Etc.
109-
110- // This needs more work, and especially benchmarks against regex.
111- // My expectation though is that using indexOf() is faster for filters
112- // which have a single wildcard (large majority), while a regex would work
113- // for maybe 2 or 3 and more wildcards (both approaches require an overhead).
99+ var FilterPlain = function ( s , tokenBeg , tokenLen ) {
100+ Filter . apply ( this , arguments ) ;
101+ } ;
114102
115- // I will collate here real cases which to use in a jsperf benchmark:
103+ FilterPlain . prototype . match = function ( s , tokenBeg ) {
104+ tokenBeg -= this . tokenBeg ;
105+ return s . indexOf ( this . s , tokenBeg ) === tokenBeg ;
106+ } ;
116107
117- // Hits:
108+ /******************************************************************************/
118109
119- // l.yimg.com*/img/badge-
120- // http://mail.yimg.com/nq/assets/micro2/v47/img/badge-sprites.png
110+ FilterPlainPrefix0 = function ( s , tokenBeg ) {
111+ Filter . apply ( this , arguments ) ;
112+ } ;
121113
122- // yimg.com/ss/rapid-*.js
123- // http://l.yimg.com/ss/rapid-3.11.js
114+ FilterPlainPrefix0 . prototype . match = function ( s , tokenBeg ) {
115+ return s . indexOf ( this . s , tokenBeg ) === tokenBeg ;
116+ } ;
124117
125- // arstechnica.net*/sponsor-
126- // http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/smartstream/ibm/sponsor-msg.png
118+ /******************************************************************************/
127119
128- // msn.com*/report.js
129- // http://blu.stj.s-msn.com/br/csl/js/D8CC944FD882D1B64561551E54F9CF3B/report.js
120+ FilterPlainPrefix1 = function ( s , tokenBeg ) {
121+ Filter . apply ( this , arguments ) ;
122+ } ;
130123
131- // /cnwk.1d/*/apex.js
132- // http://cn.cbsimg.net/cnwk.1d/Aud/javascript/gamespot/apex.js?_=1394157449540
124+ FilterPlainPrefix1 . prototype . match = function ( s , tokenBeg ) {
125+ return s . indexOf ( this . s , tokenBeg - 1 ) === tokenBeg - 1 ;
126+ } ;
133127
134- // Misses:
128+ /******************************************************************************/
135129
136- // ...
130+ // With a single wildcard, indexOf is best.
131+ // See: http://jsperf.com/regexp-vs-indexof-for-abp/4
137132
138- var FilterWildcard = function ( s , tokenBeg , tokenLen ) {
133+ FilterSingleWildcard = function ( s , tokenBeg , tokenLen ) {
139134 Filter . apply ( this , arguments ) ;
135+ this . wcOffset = s . indexOf ( '*' ) ;
136+ this . lSegment = s . slice ( 0 , this . wcOffset ) ;
137+ this . rSegment = s . slice ( this . wcOffset + 1 ) ;
140138} ;
141139
142- FilterWildcard . prototype . match = function ( s , tokenBeg ) {
143- var globalLeftOffset = tokenBeg - this . tokenBeg ;
144- var localStr = this . s ;
140+ FilterSingleWildcard . prototype . match = function ( s , tokenBeg ) {
141+ tokenBeg - this . tokenBeg ;
142+ return s . indexOf ( this . lSegment , tokenBeg ) === tokenBeg &&
143+ s . indexOf ( this . rSegment , tokenBeg + this . wcOffset ) > 0 ;
144+ } ;
145145
146- // First segment must match exactly
147- var localLeftOffset = 0 ;
148- var localRightOffset = localStr . indexOf ( '*' , localLeftOffset ) ;
149- if ( s . indexOf ( localStr . slice ( localLeftOffset , localRightOffset ) , globalLeftOffset ) !== globalLeftOffset ) {
150- return false ;
151- }
152- globalLeftOffset += localRightOffset ;
153- localLeftOffset = localRightOffset + 1 ;
154-
155- var localLen = localStr . length ;
156- while ( localLeftOffset < localLen ) {
157- localRightOffset = localStr . indexOf ( '*' , localLeftOffset ) ;
158- if ( localRightOffset < 0 ) {
159- localRightOffset = localLen ;
160- }
161- globalLeftOffset = s . indexOf ( localStr . slice ( localLeftOffset , localRightOffset ) , globalLeftOffset ) ;
162- if ( globalLeftOffset < 0 ) {
163- return false ;
164- }
165- globalLeftOffset += localRightOffset - localLeftOffset ;
166- localLeftOffset = localRightOffset + 1 ;
167- }
168- return true ;
146+ /******************************************************************************/
147+
148+ // With many wildcards, a regex is best.
149+
150+ FilterManyWildcards = function ( s , tokenBeg , tokenLen ) {
151+ Filter . apply ( this , arguments ) ;
152+ // Ref: escaper taken from:
153+ // https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
154+ // Except modified for the purpose here.
155+ this . re = new RegExp ( '^' + s . replace ( / ( [ . + ? ^ = ! : $ { } ( ) | \[ \] \/ \\ ] ) / g, '\\$1' ) . replace ( / \* / g, '.*' ) ) ;
156+ } ;
157+
158+ FilterManyWildcards . prototype . match = function ( s , tokenBeg ) {
159+ return this . re . test ( s . slice ( tokenBeg - this . tokenBeg ) ) ;
169160} ;
170161
171162/******************************************************************************/
172163
173164FilterFactory = function ( s , tokenBeg , tokenLen ) {
174- var rWildcard = s . indexOf ( '*' ) ;
175- if ( rWildcard < 0 ) {
176- return new Filter ( s , tokenBeg , tokenLen ) ;
165+ var wcOffset = s . indexOf ( '*' ) ;
166+ if ( wcOffset > 0 ) {
167+ return FilterWildcardFactory ( s , tokenBeg , tokenLen ) ;
177168 }
178- return new FilterWildcard ( s , tokenBeg , tokenLen ) ;
169+ return FilterPlainFactory ( s , tokenBeg , tokenLen ) ;
170+ } ;
171+
172+ FilterPlainFactory = function ( s , tokenBeg , tokenLen ) {
173+ if ( tokenBeg === 0 ) {
174+ return new FilterPlainPrefix0 ( s , 0 , tokenLen ) ;
175+ }
176+ if ( tokenBeg === 1 ) {
177+ return new FilterPlainPrefix1 ( s , 1 , tokenLen ) ;
178+ }
179+ return new FilterPlain ( s , tokenBeg , tokenLen ) ;
180+ } ;
181+
182+ FilterWildcardFactory = function ( s , tokenBeg , tokenLen ) {
183+ if ( ( / \* [ ^ * ] \* / ) . test ( s ) ) {
184+ return FilterManyWildcards ( s , tokenBeg , tokenLen ) ;
185+ }
186+ return new FilterSingleWildcard ( s , tokenBeg , tokenLen ) ;
179187} ;
180188
181189/******************************************************************************/
@@ -201,6 +209,7 @@ var badTokens = {
201209 'http' : true ,
202210 'https' : true ,
203211 'js' : true ,
212+ 'news' : true ,
204213 'www' : true
205214} ;
206215
@@ -275,6 +284,9 @@ var add = function(s) {
275284 var tokenEnd = reToken . lastIndex ;
276285
277286 filter = FilterFactory ( s , tokenBeg , token . length ) ;
287+ if ( ! filter ) {
288+ return false ;
289+ }
278290 filterDict [ s ] = filter ;
279291
280292 var prefixKey = s . substring ( tokenBeg - 1 , tokenBeg ) ;
@@ -296,12 +308,13 @@ var freeze = function() {
296308
297309/******************************************************************************/
298310
299- var matchStringToFilterChain = function ( filter , s , tokenBeg ) {
300- while ( filter !== undefined ) {
301- if ( filter . match ( s , tokenBeg ) ) {
311+ var matchStringToFilterChain = function ( f , s , tokenBeg ) {
312+ while ( f !== undefined ) {
313+ if ( f . match ( s , tokenBeg ) ) {
314+ // console.log('abp-filters.js> matchStringToFilterChain(): "%s" matches "%s"', f.s, s);
302315 return true ;
303316 }
304- filter = filter . next ;
317+ f = f . next ;
305318 }
306319 return false ;
307320} ;
0 commit comments