Skip to content

Commit e683f5f

Browse files
More robust detection of private networks for high traffic websites (#178)
1 parent 3e01b15 commit e683f5f

File tree

2 files changed

+133
-11
lines changed

2 files changed

+133
-11
lines changed

reporting/src/http.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ function headerOverrideViaWebRequestAPI({ url, headers, requestType }) {
190190
details.method !== 'GET' ||
191191
!matchesWebRequestApiType(requestType, details.type)
192192
) {
193-
// does that match the request that we intended to trigger
193+
// does not match the request that we intended to trigger
194194
return {};
195195
}
196196

reporting/src/network.js

Lines changed: 132 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,123 @@ const MINUTE = 60 * SECOND;
1616
const HOUR = 60 * MINUTE;
1717
const DAY = 24 * HOUR;
1818

19+
// This list is not intended to be complete. For missing entries, the
20+
// DnsResolver class will fallback to dynamic mappings learned from
21+
// observing network requests (hostname -> IP).
22+
//
23+
// Why include explicit mappings at all? Because it provides stronger
24+
// guarantees during the bootstrapping phase. In a web extension context,
25+
// where no DNS resolution API is available, we otherwise could not guarantee
26+
// that the critical IP addresses would be always resolvable.
27+
const WELL_KNOWN_HOSTNAME_IS_PRIVATE_NETWORK = {
28+
// list of private networks (completeness not required):
29+
'127.0.0.1': true,
30+
'localhost': true,
31+
'fritz.box': true,
32+
33+
// (generated: top 100 public domains ranked by traffic):
34+
'www.google.com': false,
35+
'www.fiverr.com': false,
36+
'www.youtube.com': false,
37+
'www.amazon.com': false,
38+
'www.reddit.com': false,
39+
'www.facebook.com': false,
40+
'mail.google.com': false,
41+
'docs.google.com': false,
42+
'x.com': false,
43+
'accounts.google.com': false,
44+
'www.instagram.com': false,
45+
'www.bing.com': false,
46+
'www.linkedin.com': false,
47+
'login.microsoftonline.com': false,
48+
'en.wikipedia.org': false,
49+
'github.com': false,
50+
'www.amazon.de': false,
51+
'www.ebay.com': false,
52+
'www.twitch.tv': false,
53+
'www.amazon.co.jp': false,
54+
'chatgpt.com': false,
55+
'www.amazon.fr': false,
56+
'www.msn.com': false,
57+
'news.yahoo.co.jp': false,
58+
'old.reddit.com': false,
59+
'www.amazon.co.uk': false,
60+
'www.roblox.com': false,
61+
'drive.google.com': false,
62+
'rule34.xxx': false,
63+
'www.pornhub.com': false,
64+
'www.imdb.com': false,
65+
'xhamster.com': false,
66+
'www.xvideos.com': false,
67+
'e-hentai.org': false,
68+
'www.paypal.com': false,
69+
'www.amazon.ca': false,
70+
'www.espn.com': false,
71+
'www.bbc.co.uk': false,
72+
'www.kleinanzeigen.de': false,
73+
'www.nexusmods.com': false,
74+
'steamcommunity.com': false,
75+
'www.bilibili.com': false,
76+
'www.ebay.co.uk': false,
77+
'allegro.pl': false,
78+
'meet.google.com': false,
79+
'www.aliexpress.com': false,
80+
'chaturbate.com': false,
81+
'www.yahoo.co.jp': false,
82+
'www.nytimes.com': false,
83+
'www.canva.com': false,
84+
'www.etsy.com': false,
85+
'nhentai.net': false,
86+
'www.ozon.ru': false,
87+
'www.theguardian.com': false,
88+
'www.amazon.it': false,
89+
'hitomi.la': false,
90+
'www.neopets.com': false,
91+
'outlook.live.com': false,
92+
'supjav.com': false,
93+
'store.steampowered.com': false,
94+
'www.deviantart.com': false,
95+
'calendar.google.com': false,
96+
'www.ecosia.org': false,
97+
'mail.yahoo.com': false,
98+
'de.fiverr.com': false,
99+
'letterboxd.com': false,
100+
'login.live.com': false,
101+
'www.fmkorea.com': false,
102+
'outlook.office.com': false,
103+
'news.google.com': false,
104+
'statics.teams.cdn.office.net': false,
105+
'gall.dcinside.com': false,
106+
'citizenfreepress.com': false,
107+
'duckduckgo.com': false,
108+
'www.amazon.es': false,
109+
'www.xnxx.com': false,
110+
'www.imagefap.com': false,
111+
'www.pixiv.net': false,
112+
'www.ikea.com': false,
113+
'www.netflix.com': false,
114+
'www.ebay.de': false,
115+
'imgsrc.ru': false,
116+
'www.dailymail.co.uk': false,
117+
'www.marktplaats.nl': false,
118+
'www.foxnews.com': false,
119+
'auctions.yahoo.co.jp': false,
120+
'www.booking.com': false,
121+
'game.granbluefantasy.jp': false,
122+
'www.erome.com': false,
123+
'f95zone.to': false,
124+
'www.cardmarket.com': false,
125+
'www.chess.com': false,
126+
'www.vinted.fr': false,
127+
'www.discogs.com': false,
128+
'www.ancestry.com': false,
129+
'app.hubspot.com': false,
130+
'1337x.to': false,
131+
'www.upwork.com': false,
132+
'search.yahoo.co.jp': false,
133+
'www.patreon.com': false,
134+
};
135+
19136
// Simple function to detect IP addresses that are non-public.
20137
// Local to the machine or link-only (belonging to a local network).
21138
//
@@ -61,14 +178,6 @@ export function isLocalIP(ip) {
61178
return false;
62179
}
63180

64-
// There are two unsolved problems:
65-
// - What to do if there is no IP resolution cached yet?
66-
// (Currently, it assumes that the page is private; but most of the
67-
// time this assumption will be wrong, so it looks overly conservative.
68-
// Perhaps, extending the API to return three values: yes/no/unknown
69-
// could help. But it will push the complexity to the caller.)
70-
// - Perhaps adding all seen URLs from private IPs to the bloom filter
71-
// could be a solution
72181
export class DnsResolver {
73182
constructor() {
74183
this.dns = new Map();
@@ -81,9 +190,22 @@ export class DnsResolver {
81190
}
82191

83192
isPrivateHostname(hostname) {
84-
if (hostname === 'localhost') {
85-
return true;
193+
// For a limited set of hostnames, we know how to classify them.
194+
// By design, this will never cover all websites.
195+
const isPrivate = WELL_KNOWN_HOSTNAME_IS_PRIVATE_NETWORK[hostname];
196+
if (isPrivate !== undefined) {
197+
return isPrivate;
86198
}
199+
200+
// For the long tail, fall back to checking the IP address. Since we lack a
201+
// DNS resolution API, we use mappings built from observing network requests.
202+
//
203+
// Note: Since this is a heuristic, there remains the question on how to deal
204+
// with hostnames that we have not seen before. That case should be rare
205+
// enough, since we work with hostnames that originate from visited websites.
206+
// Yet if no such mapping exists, we have to guess. Assuming a hostname is
207+
// private by default would be overly conservative; instead, we default to
208+
// public unless proven otherwise.
87209
const entry = this.dns.get(hostname);
88210
return entry?.ip && isLocalIP(entry.ip);
89211
}

0 commit comments

Comments
 (0)