@@ -101,9 +101,8 @@ HeuristicBlocker.prototype = {
101101 * Use updateTrackerPrevalence for non-webRequest initiated bookkeeping.
102102 *
103103 * @param {Object } details request/response details
104- * @param {Boolean } check_for_cookie_share whether to check for cookie sharing
105104 */
106- heuristicBlockingAccounting : function ( details , check_for_cookie_share ) {
105+ heuristicBlockingAccounting : function ( details ) {
107106 // ignore requests that are outside a tabbed window
108107 if ( details . tabId < 0 || ! badger . isLearningEnabled ( details . tabId ) ) {
109108 return { } ;
@@ -144,119 +143,6 @@ HeuristicBlocker.prototype = {
144143 self . _recordPrevalence ( request_host , request_origin , tab_origin ) ;
145144 return { } ;
146145 }
147-
148- // check for cookie sharing iff this is an image in the top-level frame, and the request URL has parameters
149- if ( check_for_cookie_share && details . type == 'image' && details . frameId === 0 && details . url . indexOf ( '?' ) > - 1 ) {
150- // get all non-HttpOnly cookies for the top-level frame
151- // and pass those to the cookie-share accounting function
152- let tab_url = self . tabUrls [ details . tabId ] ;
153-
154- let config = {
155- url : tab_url
156- } ;
157- if ( badger . firstPartyDomainPotentiallyRequired ) {
158- config . firstPartyDomain = null ;
159- }
160-
161- chrome . cookies . getAll ( config , function ( cookies ) {
162- cookies = cookies . filter ( cookie => ! cookie . httpOnly ) ;
163- if ( cookies . length >= 1 ) {
164- self . pixelCookieShareAccounting ( tab_url , tab_origin , details . url , request_host , request_origin , cookies ) ;
165- }
166- } ) ;
167- }
168- } ,
169-
170- /**
171- * Checks for cookie sharing: requests to third-party domains that include
172- * high entropy data from first-party cookies (associated with the top-level
173- * frame). Only catches plain-text verbatim sharing (b64 encoding + the like
174- * defeat it). Assumes any long string that doesn't contain URL fragments or
175- * stopwords is an identifier. Doesn't catch cookie syncing (3rd party -> 3rd
176- * party), but most of those tracking cookies should be blocked anyway.
177- *
178- * @param details are those from onBeforeSendHeaders
179- * @param cookies are the result of chrome.cookies.getAll()
180- * @returns {* }
181- */
182- pixelCookieShareAccounting : function ( tab_url , tab_origin , request_url , request_host , request_origin , cookies ) {
183- let params = ( new URL ( request_url ) ) . searchParams ,
184- TRACKER_ENTROPY_THRESHOLD = 33 ,
185- MIN_STR_LEN = 8 ;
186-
187- for ( let p of params ) {
188- let key = p [ 0 ] ,
189- value = p [ 1 ] ;
190-
191- // the argument must be sufficiently long
192- if ( ! value || value . length < MIN_STR_LEN ) {
193- continue ;
194- }
195-
196- // check if this argument is derived from a high-entropy first-party cookie
197- for ( let cookie of cookies ) {
198- // the cookie value must be sufficiently long
199- if ( ! cookie . value || cookie . value . length < MIN_STR_LEN ) {
200- continue ;
201- }
202-
203- // find the longest common substring between this arg and the cookies
204- // associated with the document
205- let substrings = utils . findCommonSubstrings ( cookie . value , value ) || [ ] ;
206- for ( let s of substrings ) {
207- // ignore the substring if it's part of the first-party URL. sometimes
208- // content servers take the url of the page they're hosting content
209- // for as an argument. e.g.
210- // https://example-cdn.com/content?u=http://example.com/index.html
211- if ( tab_url . indexOf ( s ) != - 1 ) {
212- continue ;
213- }
214-
215- // elements of the user agent string are also commonly included in
216- // both cookies and arguments; e.g. "Mozilla/5.0" might be in both.
217- // This is not a special tracking risk since third parties can see
218- // this info anyway.
219- if ( navigator . userAgent . indexOf ( s ) != - 1 ) {
220- continue ;
221- }
222-
223- // Sometimes the entire url and then some is included in the
224- // substring -- the common string might be "https://example.com/:true"
225- // In that case, we only care about the information around the URL.
226- if ( s . indexOf ( tab_url ) != - 1 ) {
227- s = s . replace ( tab_url , "" ) ;
228- }
229-
230- // During testing we found lots of common values like "homepage",
231- // "referrer", etc. were being flagged as high entropy. This searches
232- // for a few of those and removes them before we go further.
233- let lower = s . toLowerCase ( ) ;
234- lowEntropyQueryValues . forEach ( function ( qv ) {
235- let start = lower . indexOf ( qv ) ;
236- if ( start != - 1 ) {
237- s = s . replace ( s . substring ( start , start + qv . length ) , "" ) ;
238- }
239- } ) ;
240-
241- // at this point, since we might have removed things, make sure the
242- // string is still long enough to bother with
243- if ( s . length < MIN_STR_LEN ) {
244- continue ;
245- }
246-
247- // compute the entropy of this common substring. if it's greater than
248- // our threshold, record the tracking action and exit the function.
249- let entropy = utils . estimateMaxEntropy ( s ) ;
250- if ( entropy > TRACKER_ENTROPY_THRESHOLD ) {
251- log ( "Found high-entropy cookie share from" , tab_origin , "to" , request_host ,
252- ":" , entropy , "bits\n cookie:" , cookie . name , '=' , cookie . value ,
253- "\n arg:" , key , "=" , value , "\n substring:" , s ) ;
254- this . _recordPrevalence ( request_host , request_origin , tab_origin ) ;
255- return ;
256- }
257- }
258- }
259- }
260146 } ,
261147
262148 /**
@@ -552,51 +438,6 @@ var lowEntropyCookieValues = {
552438 "zu" :8
553439} ;
554440
555- const lowEntropyQueryValues = [
556- "https" ,
557- "http" ,
558- "://" ,
559- "%3A%2F%2F" ,
560- "www" ,
561- "url" ,
562- "undefined" ,
563- "impression" ,
564- "session" ,
565- "homepage" ,
566- "client" ,
567- "version" ,
568- "business" ,
569- "title" ,
570- "get" ,
571- "site" ,
572- "name" ,
573- "category" ,
574- "account_id" ,
575- "smartadserver" ,
576- "front" ,
577- "page" ,
578- "view" ,
579- "first" ,
580- "visit" ,
581- "platform" ,
582- "language" ,
583- "automatic" ,
584- "disabled" ,
585- "landing" ,
586- "entertainment" ,
587- "amazon" ,
588- "official" ,
589- "webvisor" ,
590- "anonymous" ,
591- "across" ,
592- "narrative" ,
593- "\":null" ,
594- "\":false" ,
595- "\":\"" ,
596- "\",\"" ,
597- "\",\"" ,
598- ] ;
599-
600441/**
601442 * Extract cookies from onBeforeSendHeaders
602443 *
@@ -686,7 +527,7 @@ function startListeners() {
686527 extraInfoSpec . push ( 'extraHeaders' ) ;
687528 }
688529 chrome . webRequest . onBeforeSendHeaders . addListener ( function ( details ) {
689- return badger . heuristicBlocking . heuristicBlockingAccounting ( details , true ) ;
530+ return badger . heuristicBlocking . heuristicBlockingAccounting ( details ) ;
690531 } , { urls : [ "<all_urls>" ] } , extraInfoSpec ) ;
691532
692533 /**
@@ -705,7 +546,7 @@ function startListeners() {
705546 }
706547 }
707548 if ( hasSetCookie ) {
708- return badger . heuristicBlocking . heuristicBlockingAccounting ( details , false ) ;
549+ return badger . heuristicBlocking . heuristicBlockingAccounting ( details ) ;
709550 }
710551 } ,
711552 { urls : [ "<all_urls>" ] } , extraInfoSpec ) ;
0 commit comments