Skip to content
This repository was archived by the owner on Nov 15, 2017. It is now read-only.

Commit 024113a

Browse files
committed
support for wildcards in ABP filters, more ABP lists, other details
1 parent 1e12a08 commit 024113a

File tree

6 files changed

+114
-16
lines changed

6 files changed

+114
-16
lines changed

info.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ <h2 id="generic-stats">Generic stats</h2>
6767
<li>Local cookies removed: <span id="cookieRemovedCounter"></span>
6868
<li><a href="http://diveintohtml5.info/storage.html">Local storages</a> emptied: <span id="localStorageRemovedCounter"></span>
6969
<li>Browser caches cleared: <span id="browserCacheClearedCounter"></span>
70-
<li>Requests blocked by Adblock+ complex filters: <span id="abpHitCount"></span>
70+
<li>Requests blocked by Adblock+ complex filters: <span id="abpHitCount">?</span> (<span id="abpHitRate">?</span>% of all blocked requests)
7171
</ul>
7272
</div>
7373

js/abp-filters.js

Lines changed: 94 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,23 +79,107 @@ var reToken = /[%0-9A-Za-z]{2,}/g;
7979

8080
/******************************************************************************/
8181

82-
var FilterEntry = function(s, tokenBeg, tokenLen) {
82+
var Filter = function(s, tokenBeg, tokenLen) {
8383
this.s = s;
8484
this.tokenBeg = tokenBeg;
8585
this.tokenLen = tokenLen;
8686
this.next = undefined;
8787
};
8888

89-
/******************************************************************************/
90-
91-
FilterEntry.prototype.match = function(s, tokenBeg) {
89+
Filter.prototype.match = function(s, tokenBeg) {
9290
// rhill 2014-03-05: Benchmarking shows that's the fastest way to do this.
9391
var filterBeg = tokenBeg - this.tokenBeg;
9492
return s.indexOf(this.s, filterBeg) === filterBeg;
9593
};
9694

9795
/******************************************************************************/
9896

97+
// Example:
98+
// given: "__/abcde-ghijk*mnopqr"
99+
// token: "abcde"
100+
// this.tokenBeg: 3
101+
// this.tokenLen: 5
102+
// Align origins of global and local strings
103+
// Then loop for each segment
104+
// local l offset = 0
105+
// local r offset = indexOf('*') = 14
106+
// So test local segment "__/abcde-ghijk" against external segment with
107+
// offset transposed into global coords.
108+
// Then repeat for next plain segment, after skipping wildcard. Etc.
109+
110+
// This needs more work, and especially benchmarks against regex.
111+
// My expectation though is that using indexOf() is faster for filters
112+
// which have a single wildcard (large majority), while a regex would work
113+
// for maybe 2 or 3 and more wildcards (both approaches require an overhead).
114+
115+
// I will collate here real cases which to use in a jsperf benchmark:
116+
117+
// Hits:
118+
119+
// l.yimg.com*/img/badge-
120+
// http://mail.yimg.com/nq/assets/micro2/v47/img/badge-sprites.png
121+
122+
// yimg.com/ss/rapid-*.js
123+
// http://l.yimg.com/ss/rapid-3.11.js
124+
125+
// arstechnica.net*/sponsor-
126+
// http://cdn.arstechnica.net/wp-content/themes/arstechnica/assets/images/smartstream/ibm/sponsor-msg.png
127+
128+
// msn.com*/report.js
129+
// http://blu.stj.s-msn.com/br/csl/js/D8CC944FD882D1B64561551E54F9CF3B/report.js
130+
131+
// /cnwk.1d/*/apex.js
132+
// http://cn.cbsimg.net/cnwk.1d/Aud/javascript/gamespot/apex.js?_=1394157449540
133+
134+
// Misses:
135+
136+
// ...
137+
138+
var FilterWildcard = function(s, tokenBeg, tokenLen) {
139+
Filter.apply(this, arguments);
140+
};
141+
142+
FilterWildcard.prototype.match = function(s, tokenBeg) {
143+
var globalLeftOffset = tokenBeg - this.tokenBeg;
144+
var localStr = this.s;
145+
146+
// First segment must match exactly
147+
var localLeftOffset = 0;
148+
var localRightOffset = localStr.indexOf('*', localLeftOffset);
149+
if ( s.indexOf(localStr.slice(localLeftOffset, localRightOffset), globalLeftOffset) !== globalLeftOffset ) {
150+
return false;
151+
}
152+
globalLeftOffset += localRightOffset;
153+
localLeftOffset = localRightOffset + 1;
154+
155+
var localLen = localStr.length;
156+
while ( localLeftOffset < localLen ) {
157+
localRightOffset = localStr.indexOf('*', localLeftOffset);
158+
if ( localRightOffset < 0 ) {
159+
localRightOffset = localLen;
160+
}
161+
globalLeftOffset = s.indexOf(localStr.slice(localLeftOffset, localRightOffset), globalLeftOffset);
162+
if ( globalLeftOffset < 0 ) {
163+
return false;
164+
}
165+
globalLeftOffset += localRightOffset - localLeftOffset;
166+
localLeftOffset = localRightOffset + 1;
167+
}
168+
return true;
169+
};
170+
171+
/******************************************************************************/
172+
173+
FilterFactory = function(s, tokenBeg, tokenLen) {
174+
var rWildcard = s.indexOf('*');
175+
if ( rWildcard < 0 ) {
176+
return new Filter(s, tokenBeg, tokenLen);
177+
}
178+
return new FilterWildcard(s, tokenBeg, tokenLen);
179+
};
180+
181+
/******************************************************************************/
182+
99183
// Reset all, thus reducing to a minimum memory footprint of the context.
100184

101185
var reset = function() {
@@ -151,17 +235,17 @@ var add = function(s) {
151235
s = s.replace(/\*\*+/g, '*');
152236

153237
// Ignore rules with a wildcard in the middle
154-
if ( reWildcardRule.test(s) ) {
155-
return false;
156-
}
238+
// if ( reWildcardRule.test(s) ) {
239+
// return false;
240+
// }
157241

158242
// Ignore hostname rules, these will be taken care of by HTTPSB.
159243
if ( reHostnameRule.test(s) ) {
160244
return false;
161245
}
162246

163-
// Remove pipes
164-
s = s.replace(/^\|\|/, '');
247+
// Remove leading and trailing pipes
248+
s = s.replace(/^\|+|\|+$/, '');
165249

166250
// Remove leading and trailing wildcards
167251
var pos = 0;
@@ -190,7 +274,7 @@ var add = function(s) {
190274
var tokenBeg = matches.index;
191275
var tokenEnd = reToken.lastIndex;
192276

193-
filter = new FilterEntry(s, tokenBeg, token.length);
277+
filter = FilterFactory(s, tokenBeg, token.length);
194278
filterDict[s] = filter;
195279

196280
var prefixKey = s.substring(tokenBeg - 1, tokenBeg);

js/background.js

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ var HTTPSB = {
6868
'assets/thirdparties/hosts-file.net/ad-servers': {},
6969
'assets/thirdparties/someonewhocares.org/hosts/hosts': {},
7070

71+
// Various
72+
'assets/thirdparties/winhelp2002.mvps.org/hosts.txt': {},
73+
7174
// From here on, any new list is 'off' by default
7275
// Adblock Plus
7376
'assets/thirdparties/easylist-downloads.adblockplus.org/easylist.txt': {},
@@ -77,8 +80,18 @@ var HTTPSB = {
7780
// Fanboy
7881
'assets/thirdparties/www.fanboy.co.nz/enhancedstats.txt': {},
7982

80-
// Various
81-
'assets/thirdparties/winhelp2002.mvps.org/hosts.txt': {},
83+
'assets/thirdparties/easylist-downloads.adblockplus.org/easylistgermany.txt': { off: true },
84+
'assets/thirdparties/easylist-downloads.adblockplus.org/easylistitaly.txt': { off: true },
85+
'assets/thirdparties/easylist-downloads.adblockplus.org/easylistdutch.txt': { off: true },
86+
'assets/thirdparties/easylist-downloads.adblockplus.org/liste_fr.txt': { off: true },
87+
'assets/thirdparties/adblock-chinalist.googlecode.com/svn/trunk/adblock.txt': { off: true },
88+
'assets/thirdparties/stanev.org/abp/adblock_bg.txt': { off: true },
89+
'assets/thirdparties/indonesianadblockrules.googlecode.com/hg/subscriptions/abpindo.txt': { off: true },
90+
'assets/thirdparties/liste-ar-adblock.googlecode.com/hg/Liste_AR.txt': { off: true },
91+
'assets/thirdparties/adblock-czechoslovaklist.googlecode.com/svn/filters.txt': { off: true },
92+
'assets/thirdparties/gitorious.org/adblock-latvian/adblock-latvian/raw/5f5fc83eb1a2d0e97df9a5c382febaa651511757:lists/latvian-list.txt': { off: true },
93+
'assets/thirdparties/raw.github.com/AdBlockPlusIsrael/EasyListHebrew/master/EasyListHebrew.txt': { off: true },
94+
8295
'assets/thirdparties/hosts-file.net/hosts.txt': { off: true }
8396
},
8497

js/info.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ function renderStats() {
155155
'#refererHeaderFoiledCounter': httpsb.refererHeaderFoiledCounter,
156156
'#browserCacheClearedCounter': httpsb.browserCacheClearedCounter,
157157
'#abpHitCount': httpsb.abpHitCount,
158+
'#abpHitRate': (httpsb.abpHitCount * 100 / httpsb.requestStats.blocked.all).toFixed(1),
158159
'#blockedAllCount': requestStats.blocked.all,
159160
'#blockedMainFrameCount': blockedStats.main_frame,
160161
'#blockedCookieCount': blockedStats.cookie,

js/storage.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ function loadRemoteBlacklistsHandler(store) {
251251
'loadUbiquitousBlacklistCompleted',
252252
null,
253253
onLoadUbiquitousBlacklistCompleted,
254-
100,
254+
1000,
255255
false
256256
);
257257
}
@@ -366,7 +366,7 @@ function mergeBlacklistedHosts(details) {
366366
'loadUbiquitousBlacklistCompleted',
367367
null,
368368
onLoadUbiquitousBlacklistCompleted,
369-
100,
369+
1000,
370370
false
371371
);
372372
}

js/traffic.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ function onBeforeRequestHandler(details) {
242242
}
243243

244244
// Block using ABP filters?
245-
if ( !block ) {
245+
if ( !block && !isWebPage ) {
246246
block = httpsb.abpFilters.matchString(requestURL);
247247
if ( block ) {
248248
httpsb.abpHitCount += 1;

0 commit comments

Comments
 (0)