@@ -79,23 +79,107 @@ var reToken = /[%0-9A-Za-z]{2,}/g;
7979
8080/******************************************************************************/
8181
82- var FilterEntry = function ( s , tokenBeg , tokenLen ) {
82+ var Filter = function ( s , tokenBeg , tokenLen ) {
8383 this . s = s ;
8484 this . tokenBeg = tokenBeg ;
8585 this . tokenLen = tokenLen ;
8686 this . next = undefined ;
8787} ;
8888
89- /******************************************************************************/
90-
91- FilterEntry . prototype . match = function ( s , tokenBeg ) {
89+ Filter . prototype . match = function ( s , tokenBeg ) {
9290 // rhill 2014-03-05: Benchmarking shows that's the fastest way to do this.
9391 var filterBeg = tokenBeg - this . tokenBeg ;
9492 return s . indexOf ( this . s , filterBeg ) === filterBeg ;
9593} ;
9694
9795/******************************************************************************/
9896
97+ // Example:
98+ // given: "__/abcde-ghijk*mnopqr"
99+ // token: "abcde"
100+ // this.tokenBeg: 3
101+ // this.tokenLen: 5
102+ // Align origins of global and local strings
103+ // Then loop for each segment
104+ // local l offset = 0
105+ // local r offset = indexOf('*') = 14
106+ // So test local segment "__/abcde-ghijk" against external segment with
107+ // offset transposed into global coords.
108+ // Then repeat for next plain segment, after skipping wildcard. Etc.
109+
110+ // This needs more work, and especially benchmarks against regex.
111+ // My expectation though is that using indexOf() is faster for filters
112+ // which have a single wildcard (large majority), while a regex would work
113+ // for maybe 2 or 3 and more wildcards (both approaches require an overhead).
114+
115+ // I will collate here real cases which to use in a jsperf benchmark:
116+
117+ // Hits:
118+
119+ // l.yimg.com*/img/badge-
120+ // http://mail.yimg.com/nq/assets/micro2/v47/img/badge-sprites.png
121+
122+ // yimg.com/ss/rapid-*.js
123+ // http://l.yimg.com/ss/rapid-3.11.js
124+
125+ // arstechnica.net*/sponsor-
126+ // http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/smartstream/ibm/sponsor-msg.png
127+
128+ // msn.com*/report.js
129+ // http://blu.stj.s-msn.com/br/csl/js/D8CC944FD882D1B64561551E54F9CF3B/report.js
130+
131+ // /cnwk.1d/*/apex.js
132+ // http://cn.cbsimg.net/cnwk.1d/Aud/javascript/gamespot/apex.js?_=1394157449540
133+
134+ // Misses:
135+
136+ // ...
137+
138+ var FilterWildcard = function ( s , tokenBeg , tokenLen ) {
139+ Filter . apply ( this , arguments ) ;
140+ } ;
141+
142+ FilterWildcard . prototype . match = function ( s , tokenBeg ) {
143+ var globalLeftOffset = tokenBeg - this . tokenBeg ;
144+ var localStr = this . s ;
145+
146+ // First segment must match exactly
147+ var localLeftOffset = 0 ;
148+ var localRightOffset = localStr . indexOf ( '*' , localLeftOffset ) ;
149+ if ( s . indexOf ( localStr . slice ( localLeftOffset , localRightOffset ) , globalLeftOffset ) !== globalLeftOffset ) {
150+ return false ;
151+ }
152+ globalLeftOffset += localRightOffset ;
153+ localLeftOffset = localRightOffset + 1 ;
154+
155+ var localLen = localStr . length ;
156+ while ( localLeftOffset < localLen ) {
157+ localRightOffset = localStr . indexOf ( '*' , localLeftOffset ) ;
158+ if ( localRightOffset < 0 ) {
159+ localRightOffset = localLen ;
160+ }
161+ globalLeftOffset = s . indexOf ( localStr . slice ( localLeftOffset , localRightOffset ) , globalLeftOffset ) ;
162+ if ( globalLeftOffset < 0 ) {
163+ return false ;
164+ }
165+ globalLeftOffset += localRightOffset - localLeftOffset ;
166+ localLeftOffset = localRightOffset + 1 ;
167+ }
168+ return true ;
169+ } ;
170+
171+ /******************************************************************************/
172+
173+ FilterFactory = function ( s , tokenBeg , tokenLen ) {
174+ var rWildcard = s . indexOf ( '*' ) ;
175+ if ( rWildcard < 0 ) {
176+ return new Filter ( s , tokenBeg , tokenLen ) ;
177+ }
178+ return new FilterWildcard ( s , tokenBeg , tokenLen ) ;
179+ } ;
180+
181+ /******************************************************************************/
182+
99183// Reset all, thus reducing to a minimum memory footprint of the context.
100184
101185var reset = function ( ) {
@@ -151,17 +235,17 @@ var add = function(s) {
151235 s = s . replace ( / \* \* + / g, '*' ) ;
152236
153237 // Ignore rules with a wildcard in the middle
154- if ( reWildcardRule . test ( s ) ) {
155- return false ;
156- }
238+ // if ( reWildcardRule.test(s) ) {
239+ // return false;
240+ // }
157241
158242 // Ignore hostname rules, these will be taken care of by HTTPSB.
159243 if ( reHostnameRule . test ( s ) ) {
160244 return false ;
161245 }
162246
163- // Remove pipes
164- s = s . replace ( / ^ \| \| / , '' ) ;
247+ // Remove leading and trailing pipes
248+ s = s . replace ( / ^ \| + | \| + $ / , '' ) ;
165249
166250 // Remove leading and trailing wildcards
167251 var pos = 0 ;
@@ -190,7 +274,7 @@ var add = function(s) {
190274 var tokenBeg = matches . index ;
191275 var tokenEnd = reToken . lastIndex ;
192276
193- filter = new FilterEntry ( s , tokenBeg , token . length ) ;
277+ filter = FilterFactory ( s , tokenBeg , token . length ) ;
194278 filterDict [ s ] = filter ;
195279
196280 var prefixKey = s . substring ( tokenBeg - 1 , tokenBeg ) ;
0 commit comments