-
Notifications
You must be signed in to change notification settings - Fork 37
Expand file tree
/
Copy pathadBlockRustUtils.js
More file actions
256 lines (230 loc) · 11.7 KB
/
adBlockRustUtils.js
File metadata and controls
256 lines (230 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import { Engine, FilterSet, RuleTypes, uBlockResources } from 'adblock-rs'
import fs from 'fs-extra'
import path from 'path'
import util from '../lib/util.js'
const uBlockLocalRoot = 'submodules/uBlock'
const uBlockWebAccessibleResources = path.join(uBlockLocalRoot, 'src/web_accessible_resources')
const uBlockRedirectEngine = path.join(uBlockLocalRoot, 'src/js/redirect-resources.js')
const uBlockScriptlets = path.join(uBlockLocalRoot, 'assets/resources/scriptlets.js')
const braveResourcesUrl = 'https://raw.githubusercontent.com/brave/adblock-resources/master/dist/resources.json'
const listCatalogUrl = 'https://raw.githubusercontent.com/brave/adblock-resources/master/filter_lists/list_catalog.json'
const regionalCatalogComponentId = 'gkboaolpopklhgplhaaiboijnklogmbc'
const regionalCatalogPubkey = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsAnb1lw5UA1Ww4JIVE8PjKNlPogAdFoie+Aczk6ppQ4OrHANxz6oAk1xFuT2W3uhGOc3b/1ydIUMqOIdRFvMdEDUvKVeFyNAVXNSouFF7EBLEzcZfFtqoxeIbwEplVISUm+WUbsdVB9MInY3a4O3kNNuUijY7bmHzAqWMTrBfenw0Lqv38OfREXCiNq/+Jm/gt7FhyBd2oviXWEGp6asUwNavFnj8gQDGVvCf+dse8HRMJn00QH0MOypsZSWFZRmF08ybOu/jTiUo/TuIaHL1H8y9SR970LqsUMozu3ioSHtFh/IVgq7Nqy4TljaKsTE+3AdtjiOyHpW9ZaOkA7j2QIDAQAB'
const resourcesComponentId = 'mfddibmblmbccpadfndgakiopmmhebop'
const resourcesPubkey = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA7Qk6xtml8Siq8RD6cCbdJpArt0kMci82W/KYw3KR96y67MZAsKJa8rOV2WC1BIpW539Qgl5b5lMS04cjw+sSB7f2ZKM1WOqKNij24nvEKVubunP32u8tbjtzQk9VYNcM2MZMs330eqk7iuBRTvRViSMSeE3ymqp03HFpUGsdtjEBh1A5lroCg41eVnMn1I4GKPvuhT/Qc9Yem5gzXT/3n7H6vOGQ2dVBHz44mhgwtiDcsduh+Det6lCE2TgHOhHPdCewklgcoiNXP4zfXxfpPy1jbwb4w5KUnHSRelhfDnt+jI3jgHsD4IXdVNE5H5ZAnmcOJttbkRiT8kOVS0rJXwIDAQAB'
/**
* Returns a promise that which resolves with the body parsed as JSON
*
* @param url The URL to fetch from
* @return a promise that resolves with the content of the list or rejects with an error message.
*/
const requestJSON = (url) => {
return fetch(url).then(response => {
if (response.status !== 200) {
throw new Error(`Error status ${response.status} ${response.statusText} returned for URL: ${url}`)
}
return response.json()
}).catch(error => {
throw new Error(`Error when fetching ${url}: ${error.message}`)
})
}
const lazyInit = (fn) => {
let prom
return () => {
prom = prom || fn()
return prom
}
}
const getListCatalog = lazyInit(async () => {
return requestJSON(listCatalogUrl)
})
// Legacy logic requires a distinction between default and regional lists.
// This can be removed once DAT support is no longer needed by iOS.
const isDefaultList = entry => entry.default_enabled && entry.hidden
const getRegionalLists = () => getListCatalog().then(catalog => {
return catalog.filter(entry => !isDefaultList(entry))
})
// Wraps new template scriptlets with the older "numbered template arg" format and any required dependency code
const wrapScriptletArgFormat = (fnString, dependencyPrelude) => `{
const args = ["{{1}}", "{{2}}", "{{3}}", "{{4}}", "{{5}}", "{{6}}", "{{7}}", "{{8}}", "{{9}}"];
let last_arg_index = 0;
for (const arg_index in args) {
if (args[arg_index] === '{{' + (Number(arg_index) + 1) + '}}') {
break;
}
last_arg_index += 1;
}
${dependencyPrelude}
(${fnString})(...args.slice(0, last_arg_index))
}`
const generateResources = lazyInit(async () => {
const { builtinScriptlets } = await import(path.join('..', uBlockScriptlets).toString())
const dependencyMap = builtinScriptlets.reduce((map, entry) => {
map[entry.name] = entry
return map
}, {})
const transformedUboBuiltins = builtinScriptlets.filter(s => !s.name.endsWith('.fn')).map(s => {
// Bundle dependencies wherever needed. This causes some small duplication but makes each scriptlet fully self-contained.
let dependencyPrelude = ''
const requiredDependencies = s.dependencies ?? []
for (const dep of requiredDependencies) {
for (const recursiveDep of dependencyMap[dep].dependencies ?? []) {
if (!requiredDependencies.includes(recursiveDep)) {
requiredDependencies.push(recursiveDep)
}
}
}
for (const dep of requiredDependencies.reverse()) {
const thisDepCode = dependencyMap[dep].fn.toString()
if (thisDepCode === undefined) {
throw new Error(`Couldn't find dependency ${dep}`)
}
dependencyPrelude += thisDepCode + '\n'
}
const content = Buffer.from(wrapScriptletArgFormat(s.fn.toString(), dependencyPrelude)).toString('base64')
// in Brave Browser, bit 0 (i.e. 1 << 0) signifies uBO resource permission.
return {
name: s.name,
aliases: s.aliases ?? [],
kind: { mime: 'application/javascript' },
content
}
})
const resourceData = uBlockResources(
uBlockWebAccessibleResources,
uBlockRedirectEngine
)
const braveResources = await requestJSON(braveResourcesUrl)
resourceData.push(...braveResources)
resourceData.push(...transformedUboBuiltins)
return JSON.stringify(resourceData)
})
/**
* Returns a promise that generates a resources file from the uBlock Origin
* repo hosted on GitHub
*/
const generateResourcesFile = async (outLocation) => {
return fs.writeFile(outLocation, await generateResources(), 'utf8')
}
// Removes Brave-specific scriptlet injections from non-Brave lists
const enforceBraveDirectives = (title, data) => {
if (!title || !title.startsWith('Brave ')) {
return data.split('\n').filter(line => {
const hasBraveScriptlet = line.indexOf('+js(brave-') >= 0
if (hasBraveScriptlet) {
console.log('List ' + title + ' attempted to include brave-specific directive: ' + line)
}
return !hasBraveScriptlet
}).join('\n')
} else {
return data
}
}
/**
* Parses the passed in filter rule data and serializes a data file to disk.
*
* @param filterRuleData An array of { format, data, includeRedirectUrls, ruleTypes } where format is one of `adblock-rust`'s supported filter parsing formats and data is a newline-separated list of such filters.
* includeRedirectUrls is a boolean: https://github.com/brave/adblock-rust/pull/184. We only support redirect URLs on filter lists we maintain and trust.
* ruleTypes was added with https://github.com/brave/brave-core-crx-packager/pull/298 and allows for { RuleTypes.ALL, RuleTypes.NETWORK_ONLY, RuleTypes.COSMETIC_ONLY }
* @param outputDATFilename The filename of the DAT file to create.
*/
const generateDataFileFromLists = (filterRuleData, outPath, defaultRuleType = RuleTypes.ALL) => {
const filterSet = new FilterSet(false)
for (let { title, format, data, includeRedirectUrls, ruleTypes } of filterRuleData) {
includeRedirectUrls = Boolean(includeRedirectUrls)
ruleTypes = ruleTypes || defaultRuleType
const parseOpts = { format, includeRedirectUrls, ruleTypes }
filterSet.addFilters(enforceBraveDirectives(title, data).split('\n'), parseOpts)
}
const client = new Engine(filterSet, true)
const arrayBuffer = client.serializeRaw()
fs.writeFileSync(outPath, Buffer.from(arrayBuffer))
}
/**
* Serializes the provided lists to disk in one file as `list.txt` under the given component subdirectory.
*/
const generatePlaintextListFromLists = (listBuffers, outPath) => {
const fullList = listBuffers.map(({ data, title }) => enforceBraveDirectives(title, data)).join('\n')
fs.writeFileSync(outPath, fullList)
}
/**
* Returns a promise that resolves to the contents of each list from the entry's sources.
* Throws if any of the lists fail the sanity check.
*/
const downloadListsForEntry = (entry) => {
const lists = entry.sources
const promises = []
lists.forEach((l) => {
console.log(`${entry.langs} ${l.url}...`)
promises.push(util.fetchTextFromURL(l.url)
.then(data => ({ title: l.title || entry.title, format: l.format, data }))
.then(listBuffer => {
sanityCheckList(listBuffer)
return listBuffer
})
)
})
return Promise.all(promises)
}
/**
* A list of requests that should not be blocked unless the list has some serious issue.
*
* Each element is [requestUrl, sourceUrl, requestType].
*/
const sampleUnblockedNetworkRequests = [
// real resources from a personal website which will never ship ads/trackers
['https://antonok.com', 'https://antonok.com', 'document'],
['https://antonok.com/style.css?h=9aba43f4dd864e1e4f3a', 'https://antonok.com', 'stylesheet'],
['https://antonok.com/res/font/icons.woff2?h=598bc5b2aa7cdaf390d9', 'https://antonok.com', 'font'],
['https://antonok.com/res/antonok-logo.svg', 'https://antonok.com', 'image'],
['https://antonok.com/processed_images/profile-2021-05-03.14f955fe3ab1a230.webp', 'https://antonok.com', 'image'],
// real resources from brave.com
['https://brave.com', 'https://brave.com', 'document'],
['https://brave.com/js/navigation.js', 'https://brave.com', 'script'],
['https://brave.com/static-assets/css-old/main.min.css', 'https://brave.com', 'stylesheet'],
['https://brave.com/static-assets/images/brave-logo-sans-text.svg', 'https://brave.com', 'image'],
// real resources from Brave's QA testing pages
['https://dev-pages.brave.software/filtering/index.html', 'https://dev-pages.brave.software/filtering/index.html', 'document'],
['https://dev-pages.brave.software/static/css/bootstrap.min.css', 'https://dev-pages.brave.software/filtering/index.html', 'stylesheet'],
['https://dev-pages.brave.software/static/css/site.css', 'https://dev-pages.brave.software/filtering/index.html', 'stylesheet'],
['https://dev-pages.brave.software/static/js/site.js', 'https://dev-pages.brave.software/filtering/index.html', 'script'],
['https://dev-pages.bravesoftware.com/filtering/index.html', 'https://dev-pages.bravesoftware.com/filtering/index.html', 'document'],
['https://dev-pages.bravesoftware.com/static/images/test.jpg', 'https://dev-pages.bravesoftware.com/filtering/additional-lists.html', 'image'],
// real resources from wikipedia.org
['https://en.wikipedia.org/wiki/Ad_blocking', 'https://en.wikipedia.org/wiki/Ad_blocking', 'document'],
['https://en.wikipedia.org/w/load.php?lang=en&modules=codex-search-styles%7Cext.cite.styles%7Cext.pygments%2CwikimediaBadges%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cskins.vector.icons%2Cstyles%7Cskins.vector.zebra.styles%7Cwikibase.client.init&only=styles&skin=vector-2022', 'https://en.wikipedia.org/wiki/Ad_blocking', 'stylesheet'],
['https://en.wikipedia.org/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-202', 'https://en.wikipedia.org/wiki/Ad_blocking', 'script'],
['https://en.wikipedia.org/static/images/mobile/copyright/wikipedia-wordmark-en.svg', 'https://en.wikipedia.org/wiki/Ad_blocking', 'image'],
['https://en.wikipedia.org/static/images/icons/wikipedia.png', 'https://en.wikipedia.org/wiki/Ad_blocking', 'image'],
// hypothetical embeddings on example.com
['https://antonok.com', 'https://example.com', 'subdocument'],
['https://brave.com', 'https://example.com', 'subdocument'],
['https://en.wikipedia.org/wiki/Ad_blocking', 'https://example.com', 'subdocument']
]
/*
* Throw an error if the list is blocking any of the resources from `sampleNetworkRequests`.
*/
const sanityCheckList = ({ title, format, data }) => {
const filterSet = new FilterSet()
filterSet.addFilters(data.split('\n'), { format })
const engine = new Engine(filterSet)
for (const request of sampleUnblockedNetworkRequests) {
const result = engine.check(request[0], request[1], request[2])
if (result) {
throw new Error(title + ' failed sanity check for ' + request + '. Check for corrupted list contents.')
}
}
}
export {
downloadListsForEntry,
regionalCatalogComponentId,
regionalCatalogPubkey,
resourcesComponentId,
resourcesPubkey,
sanityCheckList,
generateDataFileFromLists,
generatePlaintextListFromLists,
generateResourcesFile,
getListCatalog,
getRegionalLists
}