@@ -37,6 +37,9 @@ function HeuristicBlocker(pbStorage) {
3737 // impossible to attribute to a tab.
3838 this . tabOrigins = { } ;
3939 this . tabUrls = { } ;
40+
41+ // in-memory cache for community learning
42+ this . previouslySharedTrackers = new Set ( ) ;
4043}
4144
4245HeuristicBlocker . prototype = {
@@ -105,8 +108,10 @@ HeuristicBlocker.prototype = {
105108 */
106109 // TODO more like heuristicLearningFromCookies ... check DESIGN doc
107110 heuristicBlockingAccounting : function ( details , check_for_cookie_share ) {
111+ let tab_id = details . tabId ;
112+
108113 // ignore requests that are outside a tabbed window
109- if ( details . tabId < 0 || ! badger . isLearningEnabled ( details . tabId ) ) {
114+ if ( tab_id < 0 || ! badger . isLearningEnabled ( tab_id ) ) {
110115 return { } ;
111116 }
112117
@@ -115,12 +120,12 @@ HeuristicBlocker.prototype = {
115120
116121 // if this is a main window request, update tab data and quit
117122 if ( details . type == "main_frame" ) {
118- self . tabOrigins [ details . tabId ] = window . getBaseDomain ( request_host ) ;
119- self . tabUrls [ details . tabId ] = details . url ;
123+ self . tabOrigins [ tab_id ] = window . getBaseDomain ( request_host ) ;
124+ self . tabUrls [ tab_id ] = details . url ;
120125 return { } ;
121126 }
122127
123- let tab_base = self . tabOrigins [ details . tabId ] ;
128+ let tab_base = self . tabOrigins [ tab_id ] ;
124129 if ( ! tab_base ) {
125130 return { } ;
126131 }
@@ -152,15 +157,15 @@ HeuristicBlocker.prototype = {
152157
153158 // check if there are tracking cookies
154159 if ( hasCookieTracking ( details ) ) {
155- self . _recordPrevalence ( request_host , request_base , tab_base ) ;
160+ self . _recordPrevalence ( request_host , request_base , tab_base , tab_id , constants . TRACKER_TYPES . COOKIE ) ;
156161 return { } ;
157162 }
158163
159164 // check for cookie sharing iff this is an image in the top-level frame, and the request URL has parameters
160165 if ( check_for_cookie_share && details . type == 'image' && details . frameId === 0 && details . url . indexOf ( '?' ) > - 1 ) {
161166 // get all non-HttpOnly cookies for the top-level frame
162167 // and pass those to the cookie-share accounting function
163- let tab_url = self . tabUrls [ details . tabId ] ;
168+ let tab_url = self . tabUrls [ tab_id ] ;
164169
165170 let config = {
166171 url : tab_url
@@ -174,7 +179,7 @@ HeuristicBlocker.prototype = {
174179 if ( cookies . length >= 1 ) {
175180 // TODO refactor with new URI() above?
176181 let searchParams = ( new URL ( details . url ) ) . searchParams ;
177- self . pixelCookieShareAccounting ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) ;
182+ self . pixelCookieShareAccounting ( tab_id , tab_url , tab_base , searchParams , request_host , request_base , cookies ) ;
178183 }
179184 } ) ;
180185 }
@@ -192,7 +197,7 @@ HeuristicBlocker.prototype = {
192197 * Doesn't catch cookie syncing (3rd party -> 3rd party),
193198 * but most of those tracking cookies should be blocked anyway.
194199 */
195- pixelCookieShareAccounting : function ( tab_url , tab_base , searchParams , request_host , request_base , cookies ) {
200+ pixelCookieShareAccounting : function ( tab_id , tab_url , tab_base , searchParams , request_host , request_base , cookies ) {
196201 const TRACKER_ENTROPY_THRESHOLD = 33 ,
197202 MIN_STR_LEN = 8 ;
198203
@@ -263,7 +268,7 @@ HeuristicBlocker.prototype = {
263268 log ( "Found high-entropy cookie share from" , tab_base , "to" , request_host ,
264269 ":" , entropy , "bits\n cookie:" , cookie . name , '=' , cookie . value ,
265270 "\n arg:" , key , "=" , value , "\n substring:" , s ) ;
266- this . _recordPrevalence ( request_host , request_base , tab_base ) ;
271+ this . _recordPrevalence ( request_host , request_base , tab_base , tab_id , constants . TRACKER_TYPES . COOKIE_SHARE ) ;
267272 return ;
268273 }
269274 }
@@ -277,8 +282,10 @@ HeuristicBlocker.prototype = {
277282 * @param {String } tracker_fqdn The fully qualified domain name of the tracker
278283 * @param {String } tracker_base Base domain of the third party tracker
279284 * @param {String } site_base Base domain of page where tracking occurred
285+ * @param {Integer } tab_id the ID of the tab the user is in
286+ * @param {String } tracker_type the kind of tracking action that was observed
280287 */
281- updateTrackerPrevalence : function ( tracker_fqdn , tracker_base , site_base ) {
288+ updateTrackerPrevalence : function ( tracker_fqdn , tracker_base , site_base , tab_id , tracker_type ) {
282289 // abort if we already made a decision for this fqdn
283290 let action = this . storage . getAction ( tracker_fqdn ) ;
284291 if ( action != constants . NO_TRACKING && action != constants . ALLOW ) {
@@ -288,7 +295,9 @@ HeuristicBlocker.prototype = {
288295 this . _recordPrevalence (
289296 tracker_fqdn ,
290297 tracker_base ,
291- site_base
298+ site_base ,
299+ tab_id ,
300+ tracker_type
292301 ) ;
293302 } ,
294303
@@ -304,8 +313,10 @@ HeuristicBlocker.prototype = {
304313 * @param {String } tracker_fqdn The FQDN of the third party tracker
305314 * @param {String } tracker_base Base domain of the third party tracker
306315 * @param {String } site_base Base domain of page where tracking occurred
316+ * @param {Integer } tab_id the ID of the tab the user is in
317+ * @param {String } tracker_type the kind of tracking action that was observed
307318 */
308- _recordPrevalence : function ( tracker_fqdn , tracker_base , site_base ) {
319+ _recordPrevalence : function ( tracker_fqdn , tracker_base , site_base , tab_id , tracker_type ) {
309320 // GDPR Consent Management Provider
310321 // https://github.com/EFForg/privacybadger/pull/2245#issuecomment-545545717
311322 if ( tracker_base == "consensu.org" ) {
@@ -330,22 +341,81 @@ HeuristicBlocker.prototype = {
330341 return ;
331342 }
332343
344+ // If community learning is enabled, queue up a request to the EFF server
345+ if ( badger . isCommunityLearningEnabled ( tab_id ) ) {
346+ let page_fqdn = ( new URI ( this . tabUrls [ tab_id ] ) ) . host ;
347+ self . shareTrackerInfo ( page_fqdn , tracker_fqdn , tracker_type ) ;
348+ }
349+
350+ // If local learning is enabled,
333351 // record that we've seen this tracker on this domain
334- firstParties . push ( site_base ) ;
335- snitchMap . setItem ( tracker_base , firstParties ) ;
336-
337- // ALLOW indicates this is a tracker still below TRACKING_THRESHOLD
338- // (vs. NO_TRACKING for resources we haven't seen perform tracking yet).
339- // see https://github.com/EFForg/privacybadger/pull/1145#discussion_r96676710
340- self . storage . setupHeuristicAction ( tracker_fqdn , constants . ALLOW ) ;
341- self . storage . setupHeuristicAction ( tracker_base , constants . ALLOW ) ;
342-
343- // (cookie)block the tracker if it has been seen on multiple first party domains
344- if ( firstParties . length >= constants . TRACKING_THRESHOLD ) {
345- log ( "blocklisting" , tracker_fqdn ) ;
346- self . blocklistOrigin ( tracker_base , tracker_fqdn ) ;
352+ if ( badger . isLocalLearningEnabled ( tab_id ) ) {
353+ firstParties . push ( site_base ) ;
354+ snitchMap . setItem ( tracker_base , firstParties ) ;
355+
356+ // ALLOW indicates this is a tracker still below TRACKING_THRESHOLD
357+ // (vs. NO_TRACKING for resources we haven't seen perform tracking yet).
358+ // see https://github.com/EFForg/privacybadger/pull/1145#discussion_r96676710
359+ self . storage . setupHeuristicAction ( tracker_fqdn , constants . ALLOW ) ;
360+ self . storage . setupHeuristicAction ( tracker_base , constants . ALLOW ) ;
361+
362+ // (cookie)block the tracker if it has been seen on multiple first party domains
363+ if ( firstParties . length >= constants . TRACKING_THRESHOLD ) {
364+ log ( "blocklisting" , tracker_fqdn ) ;
365+ self . blocklistOrigin ( tracker_base , tracker_fqdn ) ;
366+ }
367+ }
368+ } ,
369+
370+ /**
371+ * Share information about a tracker for community learning
372+ */
373+ shareTrackerInfo : function ( page_host , tracker_host , tracker_type ) {
374+ // Share a random sample of trackers we observe
375+ if ( Math . random ( ) < constants . CL_PROBABILITY ) {
376+ // check if we've shared this tracker recently
377+ // note that this check comes after checking against the snitch map
378+ let tr_str = page_host + '+' + tracker_host + '+' + tracker_type ;
379+ if ( this . previouslySharedTrackers . has ( tr_str ) ) {
380+ return ;
381+ }
382+
383+ // add this entry to the cache
384+ this . previouslySharedTrackers . add ( tr_str ) ;
385+
386+ // if the cache gets too big, cut it in half
387+ if ( this . previouslySharedTrackers . size > constants . CL_CACHE_SIZE ) {
388+ this . previouslySharedTrackers = new Set (
389+ // An array created from the set will have all of its entries ordered
390+ // by when they were added
391+ Array . from ( this . previouslySharedTrackers ) . slice (
392+ // keep the most recent half of the cache entries
393+ Math . floor ( constants . CL_CACHE_SIZE / 2 )
394+ )
395+ ) ;
396+ }
397+
398+ // now make the request to the database server
399+ setTimeout ( function ( ) {
400+ fetch ( "http://localhost:8080" , {
401+ method : "POST" ,
402+ body : JSON . stringify ( {
403+ tracker_data : {
404+ page_host : page_host ,
405+ tracker_host : tracker_host ,
406+ tracker_type : tracker_type ,
407+ }
408+ } )
409+ } ) . then ( res => {
410+ if ( ! res . ok ) {
411+ console . log ( "tracking action logging failed:" , res ) ;
412+ }
413+ } ) ;
414+ // share info after a random delay, to reduce network load on browser
415+ } , Math . floor ( Math . random ( ) * constants . MAX_CL_WAIT_TIME ) ) ;
347416 }
348417 }
418+
349419} ;
350420
351421
0 commit comments