@@ -145,6 +145,7 @@ HTTPSB.loadUbiquitousWhitelists = function() {
145145
146146HTTPSB . mergeUbiquitousWhitelist = function ( details ) {
147147 var ubiquitousWhitelist = this . ubiquitousWhitelist ;
148+ var reAdblockFilter = / ^ [ ^ a - z 0 - 9 : ] | [ ^ a - z 0 - 9 ] $ | [ ^ a - z 0 - 9 _ : . - ] / ;
148149 var raw = details . content . toLowerCase ( ) ;
149150 var rawEnd = raw . length ;
150151 var lineBeg = 0 ;
@@ -153,16 +154,23 @@ HTTPSB.mergeUbiquitousWhitelist = function(details) {
153154 while ( lineBeg < rawEnd ) {
154155 lineEnd = raw . indexOf ( '\n' , lineBeg ) ;
155156 if ( lineEnd < 0 ) {
156- lineEnd = rawEnd ;
157+ lineEnd = raw . indexOf ( '\r' , lineBeg ) ;
158+ if ( lineEnd < 0 ) {
159+ lineEnd = rawEnd ;
160+ }
157161 }
158- line = raw . slice ( lineBeg , lineEnd ) ;
162+ line = raw . slice ( lineBeg , lineEnd ) . trim ( ) ;
159163 lineBeg = lineEnd + 1 ;
160164 pos = line . indexOf ( '#' ) ;
161165 if ( pos >= 0 ) {
162166 line = line . slice ( 0 , pos ) ;
163167 }
164168 line = line . trim ( ) ;
165- if ( ! line . length ) {
169+ if ( line === '' ) {
170+ continue ;
171+ }
172+ // Ignore whatever appears to be an Adblock filters
173+ if ( reAdblockFilter . test ( line ) ) {
166174 continue ;
167175 }
168176 ubiquitousWhitelist . add ( line ) ;
@@ -295,52 +303,81 @@ HTTPSB.mergeUbiquitousBlacklist = function(details) {
295303 // Useful references:
296304 // https://adblockplus.org/en/filter-cheatsheet
297305 // https://adblockplus.org/en/filters
298- var adblock = ( / ^ \[ a d b l o c k + p l u s \ + \d \. \d \] / i) . test ( raw ) ;
299306 var abpFilters = this . userSettings . parseAllABPFilters ? this . abpFilters : null ;
300- var hostFromAdblockFilter = function ( s ) {
301- var matches = s . match ( / ^ \| \| ( [ a - z 0 - 9 . - ] + ) \^ $ / ) ;
302- if ( matches && matches . length > 1 ) {
303- return matches [ 1 ] ;
304- }
305- return '' ;
306- } ;
307-
308307 var ubiquitousBlacklist = this . ubiquitousBlacklist ;
309308 var thisListCount = 0 ;
310309 var thisListUsedCount = 0 ;
311- var localhostRegex = / ( ^ | \b ) ( l o c a l h o s t \. l o c a l d o m a i n | l o c a l h o s t | l o c a l | b r o a d c a s t h o s t | 0 \. 0 \. 0 \. 0 | 1 2 7 \. 0 \. 0 \. 1 | : : 1 | f e 8 0 : : 1 % l o 0 ) ( \b | $ ) / g;
312- var lineBeg = 0 ;
313- var lineEnd ;
314- var line , pos ;
310+ var reLocalhost = / ( ^ | \s ) ( l o c a l h o s t \. l o c a l d o m a i n | l o c a l h o s t | l o c a l | b r o a d c a s t h o s t | 0 \. 0 \. 0 \. 0 | 1 2 7 \. 0 \. 0 \. 1 | : : 1 | f e 8 0 : : 1 % l o 0 ) (? = \s | $ ) / g;
311+ var reAdblockFilter = / ^ [ ^ a - z 0 - 9 : ] | [ ^ a - z 0 - 9 ] $ | [ ^ a - z 0 - 9 _ : . - ] / ;
312+ var reAdblockHostFilter = / ^ \| \| ( [ a - z 0 - 9 . - ] + [ a - z 0 - 9 ] ) \^ ? $ / ;
313+ var reAsciiSegment = / [ \x21 - \x7e ] + / ;
314+ var matches ;
315+ var lineBeg = 0 , lineEnd ;
316+ var line , c ;
317+
315318 while ( lineBeg < rawEnd ) {
316319 lineEnd = raw . indexOf ( '\n' , lineBeg ) ;
317320 if ( lineEnd < 0 ) {
318- lineEnd = rawEnd ;
321+ lineEnd = raw . indexOf ( '\r' , lineBeg ) ;
322+ if ( lineEnd < 0 ) {
323+ lineEnd = rawEnd ;
324+ }
319325 }
320- line = raw . slice ( lineBeg , lineEnd ) ;
326+
327+ // rhill 2014-04-18: The trim is important here, as without it there
328+ // could be a lingering `\r` which would cause problems in the
329+ // following parsing code.
330+ line = raw . slice ( lineBeg , lineEnd ) . trim ( ) ;
321331 lineBeg = lineEnd + 1 ;
322332
323- // rhill 2014-01-22: Transpose possible Adblock Plus-filter syntax
324- // into a plain hostname if possible.
325- // Useful reference: https://adblockplus.org/en/filter-cheatsheet#blocking2
326- if ( adblock ) {
327- if ( abpFilters && abpFilters . add ( line ) ) {
328- continue ;
329- }
330- line = hostFromAdblockFilter ( line ) ;
333+ // Strip comments
334+ c = line . charAt ( 0 ) ;
335+ if ( c === '#' || c === '!' || c === '[' ) {
336+ continue ;
331337 }
338+ line = line . replace ( / \s + # .* $ / , '' ) ;
332339
333- pos = line . indexOf ( '#' ) ;
334- if ( pos >= 0 ) {
335- line = line . slice ( 0 , pos ) ;
336- }
337340 // https://github.com/gorhill/httpswitchboard/issues/15
338- // Ensure localhost et al. don't end up on the read-only blacklist.
339- line = line . replace ( localhostRegex , ' ' ) ;
341+ // Ensure localhost et al. don't end up in the ubiquitous blacklist.
342+ line = line . replace ( reLocalhost , '' ) ;
343+
340344 line = line . trim ( ) ;
341- if ( ! line . length ) {
345+
346+ // The filter is whatever sequence of printable ascii character without
347+ // whitespaces
348+ matches = reAsciiSegment . exec ( line ) ;
349+ if ( ! matches || matches . length === 0 ) {
342350 continue ;
343351 }
352+
353+ // Bypass anomalies
354+ if ( matches [ 0 ] !== line ) {
355+ console . error ( '"%s": "%s" !== "%s"' , details . path , matches [ 0 ] , line ) ;
356+ continue ;
357+ }
358+
359+ line = matches [ 0 ] ;
360+
361+ // Likely an ABP filter?
362+ if ( reAdblockFilter . test ( line ) ) {
363+ if ( abpFilters !== null ) {
364+ if ( abpFilters . add ( line ) ) {
365+ continue ;
366+ }
367+ }
368+ // rhill 2014-01-22: Transpose possible Adblock Plus-filter syntax
369+ // into a plain hostname if possible.
370+ matches = reAdblockHostFilter . exec ( line ) ;
371+ if ( ! matches || matches . length < 2 ) {
372+ continue ;
373+ }
374+ line = matches [ 1 ] ;
375+ }
376+
377+ if ( line === '' ) {
378+ continue ;
379+ }
380+
344381 thisListCount ++ ;
345382 if ( ubiquitousBlacklist . add ( line ) ) {
346383 thisListUsedCount ++ ;
0 commit comments