Skip to content

Commit bd38aa8

Browse files
committed
Add JavaScript option
1 parent 7380ef6 commit bd38aa8

20 files changed

+325
-88
lines changed

res/templates/javaScript.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<script id="mwoffliner-jsConfigVars">
2+
document.documentElement.classList.replace('client-nojs', 'client-js');
3+
RLCONF = __ARTICLE_CONFIGVARS__;
4+
RLSTATE = __ARTICLE_CSS_STATE__;
5+
RLPAGEMODULES = __ARTICLE_JS_MODULES__;
6+
7+
(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"});}];});});
8+
</script>

res/templates/pageFallback.html

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
<head>
44
<meta charset="UTF-8" />
55
<title></title>
6+
__ARTICLE_JAVASCRIPT__
67
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
78
<link rel="icon" type="image/png" href="__RELATIVE_FILE_PATH____RES_DIR__/favicon.png" />
89
__ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_BEFORE_META__
10+
__ARTICLE_JS_STARTUP__
911
<meta name="ResourceLoaderDynamicStyles" content="" />
1012
__ARTICLE_CSS_AFTER_META__
1113
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____MW_DIR__/site.styles.css" />
12-
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____MW_DIR__/noscript.css" />
14+
__ARTICLE_CSS_NOSCRIPT__
1315
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____RES_DIR__/footer.css" />
1416
</head>
1517

@@ -24,6 +26,6 @@ <h1 id="firstHeading" class="firstHeading mw-first-heading" __ARTICLE_FIRST_HEAD
2426
<div id="mw-content-text" class="mw-body-content"></div>
2527
</div>
2628
</div>
27-
__ARTICLE_CONFIGVARS_LIST__ __ARTICLE_JS_LIST__
29+
__ARTICLE_JS_LIST__
2830
</body>
2931
</html>

res/templates/pageVector2022.html

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
<head>
44
<meta charset="UTF-8" />
55
<title></title>
6+
__ARTICLE_JAVASCRIPT__
67
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
78
<link rel="icon" type="image/png" href="__RELATIVE_FILE_PATH____RES_DIR__/favicon.png" />
89
__ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_BEFORE_META__
10+
__ARTICLE_JS_STARTUP__
911
<meta name="ResourceLoaderDynamicStyles" content="" />
1012
__ARTICLE_CSS_AFTER_META__
1113
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____MW_DIR__/site.styles.css" />
12-
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____MW_DIR__/noscript.css" />
14+
__ARTICLE_CSS_NOSCRIPT__
1315
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____RES_DIR__/footer.css" />
1416
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____RES_DIR__/vector-2022.css" />
1517
</head>
@@ -32,6 +34,6 @@ <h1 id="firstHeading" class="firstHeading mw-first-heading" __ARTICLE_FIRST_HEAD
3234
</div>
3335
</div>
3436
</div>
35-
__ARTICLE_CONFIGVARS_LIST__ __ARTICLE_JS_LIST__
37+
__ARTICLE_JS_LIST__
3638
</body>
3739
</html>

res/templates/pageVectorLegacy.html

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
<head>
44
<meta charset="UTF-8" />
55
<title></title>
6+
__ARTICLE_JAVASCRIPT__
67
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
78
<link rel="icon" type="image/png" href="__RELATIVE_FILE_PATH____RES_DIR__/favicon.png" />
89
__ARTICLE_CANONICAL_LINK__ __ARTICLE_CSS_BEFORE_META__
10+
__ARTICLE_JS_STARTUP__
911
<meta name="ResourceLoaderDynamicStyles" content="" />
1012
__ARTICLE_CSS_AFTER_META__
1113
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____MW_DIR__/site.styles.css" />
12-
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____MW_DIR__/noscript.css" />
14+
__ARTICLE_CSS_NOSCRIPT__
1315
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____RES_DIR__/footer.css" />
1416
<link rel="stylesheet" type="text/css" href="__RELATIVE_FILE_PATH____RES_DIR__/vector.css" />
1517
</head>
@@ -25,6 +27,6 @@ <h1 id="firstHeading" class="firstHeading mw-first-heading" __ARTICLE_FIRST_HEAD
2527
<div id="mw-content-text" class="mw-body-content"></div>
2628
</div>
2729
</div>
28-
__ARTICLE_CONFIGVARS_LIST__ __ARTICLE_JS_LIST__
30+
__ARTICLE_JS_LIST__
2931
</body>
3032
</html>

src/Downloader.ts

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import { config } from './config.js'
33
import { contains, normalizeMwResponse, DB_ERROR, WEAK_ETAG_REGEX, stripHttpFromUrl, isBitmapImageMimeType, isWebpCandidateImageMimeType } from './util/index.js'
44
import { Readable } from 'stream'
55
import deepmerge from 'deepmerge'
6-
import * as domino from 'domino'
76
import { default as imagemin } from 'imagemin'
87
import imageminAdvPng from 'imagemin-advpng'
98
import type { BackoffStrategy } from 'backoff'
@@ -31,6 +30,7 @@ import RestApiURLDirector from './util/builders/url/rest-api.director.js'
3130
import { Renderer } from './renderers/abstract.renderer.js'
3231
import { findFirstMatchingRule, renderDownloadError } from './error.manager.js'
3332
import RedisStore from './RedisStore.js'
33+
import { extractJsConfigVars } from './util/articles.js'
3434

3535
const imageminOptions = new Map()
3636
imageminOptions.set('default', new Map())
@@ -59,6 +59,7 @@ interface DownloaderOpts {
5959
optimisationCacheUrl: string
6060
s3?: S3
6161
webp: boolean
62+
trustedJs?: string[]
6263
backoffOptions?: BackoffOptions
6364
insecure?: boolean
6465
}
@@ -123,6 +124,7 @@ class Downloader {
123124
private _arrayBufferRequestOptions: AxiosRequestConfig
124125
private _jsonRequestOptions: AxiosRequestConfig
125126
private _streamRequestOptions: AxiosRequestConfig
127+
public trustedJs: string[] = []
126128
public wikimediaMobileJsDependenciesList: string[] = []
127129
public wikimediaMobileStyleDependenciesList: string[] = []
128130

@@ -163,13 +165,14 @@ class Downloader {
163165
return this._apiUrlDirector
164166
}
165167

166-
set init({ uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions, insecure }: DownloaderOpts) {
168+
set init({ uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, trustedJs = config.output.mw.js_trusted.slice(), backoffOptions, insecure }: DownloaderOpts) {
167169
this.reset()
168170
this.uaString = uaString
169171
this._speed = speed
170172
this._requestTimeout = reqTimeout
171173
this.optimisationCacheUrl = optimisationCacheUrl
172174
this._webp = webp
175+
this.trustedJs = trustedJs
173176
this.s3 = s3
174177
this._apiUrlDirector = new ApiURLDirector(MediaWiki.actionApiUrl.href)
175178
this.insecure = insecure
@@ -271,6 +274,7 @@ class Downloader {
271274
this._requestTimeout = undefined
272275
this.optimisationCacheUrl = undefined
273276
this._webp = false
277+
this.trustedJs = []
274278
this.s3 = undefined
275279
this._apiUrlDirector = undefined
276280
this.insecure = false
@@ -849,7 +853,7 @@ class Downloader {
849853

850854
/* If article is missing (for example because it just has been deleted) */
851855
if (articleData.error.code === 'missingtitle') {
852-
return { jsConfigVars: '', jsDependenciesList: [], styleDependenciesList: [] }
856+
return { jsConfigVars: {}, jsDependenciesList: [], styleDependenciesList: [] }
853857
}
854858

855859
/* Something went wrong in modules retrieval at app level (no HTTP error) */
@@ -870,7 +874,7 @@ class Downloader {
870874
logger.info(`Js dependencies of ${title} : ${jsDependenciesList}`)
871875
logger.info(`Css dependencies of ${title} : ${styleDependenciesList}`)
872876

873-
const jsConfigVars = Downloader.extractJsConfigVars(headhtml)
877+
const jsConfigVars = extractJsConfigVars(headhtml)
874878

875879
// Download mobile page dependencies only once
876880
if ((await MediaWiki.hasWikimediaMobileApi()) && this.wikimediaMobileJsDependenciesList.length === 0 && this.wikimediaMobileStyleDependenciesList.length === 0) {
@@ -904,27 +908,6 @@ class Downloader {
904908
stream.on('end', () => resolve(Buffer.concat(chunks)))
905909
})
906910
}
907-
908-
public static extractJsConfigVars(headhtml: string) {
909-
let jsConfigVars = ''
910-
911-
// Saving, as a js module, the jsconfigvars that are set in the header of a wikipedia page
912-
// the script below extracts the config with a regex executed on the page header returned from the api
913-
const scriptTags = domino.createDocument(`${headhtml}</body></html>`).getElementsByTagName('script')
914-
const regex = /mw\.config\.set\(\{.*?\}\);/gm
915-
for (let i = 0; i < scriptTags.length; i += 1) {
916-
if (scriptTags[i].text.includes('mw.config.set')) {
917-
jsConfigVars = regex.exec(scriptTags[i].text)[0] || ''
918-
jsConfigVars = `(window.RLQ=window.RLQ||[]).push(function() {${jsConfigVars}});`
919-
} else if (scriptTags[i].text.includes('RLCONF') || scriptTags[i].text.includes('RLSTATE') || scriptTags[i].text.includes('RLPAGEMODULES')) {
920-
jsConfigVars = scriptTags[i].text
921-
}
922-
}
923-
924-
jsConfigVars = jsConfigVars.replace('nosuchaction', 'view') // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view'
925-
926-
return jsConfigVars
927-
}
928911
}
929912

930913
export { Downloader as DownloaderClass }

src/Gadgets.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class Gadgets {
3737
const module = gadget.metadata.module
3838
if (module.peers && module.peers.length) {
3939
// Only JS Gadgets can have peers
40-
cssGadgets.concat(module.peers)
40+
cssGadgets.push(...module.peers)
4141
return jsGadgets.push(gadget.id)
4242
}
4343
if (module.scripts && module.scripts.length) return jsGadgets.push(gadget.id)

src/Templates.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ const htmlRedirectTemplateCode = () => {
4646
return readTemplate(config.output.templates.htmlRedirect)
4747
}
4848

49+
const javaScriptTemplateCode = () => {
50+
return readTemplate(config.output.templates.javaScript)
51+
}
52+
4953
const articleListHomeTemplate = readTemplate(config.output.templates.articleListHomeTemplate)
5054

5155
export {
@@ -59,6 +63,7 @@ export {
5963
htmlVector2022TemplateCode,
6064
htmlFallbackTemplateCode,
6165
htmlRedirectTemplateCode,
66+
javaScriptTemplateCode,
6267
articleListHomeTemplate,
6368
categoriesTemplate,
6469
subCategoriesTemplate,

src/config.ts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,33 @@ const config = {
101101
],
102102
css_simplified: [
103103
'site.styles', // always needed
104-
'noscript', // recommended until we solve https://github.com/openzim/mwoffliner/issues/2310
104+
'noscript', // in case client has JS disabled
105105
],
106106
js_simplified: [
107107
// base JS scripts always needed / never returned on API calls
108-
'startup',
108+
'jquery',
109+
'mediawiki.base',
110+
],
111+
js_trusted: [
112+
// JS modules and their dependencies trusted to not request any external resources
113+
'jquery',
114+
'mediawiki.base',
115+
'jquery.tablesorter',
116+
'jquery.makeCollapsible',
117+
'mediawiki.page.ready',
118+
'mediawiki.page.gallery',
119+
'ext.cite.ux-enhancements',
120+
'ext.pygments.view',
121+
'ext.Tabber',
122+
'ext.tabberNeue',
123+
'ext.tmh.player',
124+
'ext.cargo.main',
109125
],
126+
js_dynamic_dependencies: {
127+
'mediawiki.page.ready': ['jquery.tablesorter', 'jquery.makeCollapsible'],
128+
'ext.tmh.player': ['ext.tmh.player.inline', 'ext.tmh.player.dialog'],
129+
'ext.cargo.main': ['oojs-ui-core'],
130+
},
110131
},
111132

112133
// Output paths for storing stuff
@@ -136,6 +157,8 @@ const config = {
136157

137158
subPages: './templates/subpages.html',
138159

160+
javaScript: './templates/javaScript.html',
161+
139162
articleListHomeTemplate: './templates/article_list_home.html',
140163

141164
/* License footer template code */

src/mwoffliner.lib.ts

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import {
2323
MAX_CPU_CORES,
2424
MIN_IMAGE_THRESHOLD_ARTICLELIST_PAGE,
2525
downloadAndSaveModule,
26+
downloadAndSaveStartupModule,
27+
getModuleDependencies,
2628
genCanonicalLink,
2729
genHeaderCSSLink,
2830
genHeaderScript,
@@ -95,6 +97,8 @@ async function execute(argv: any) {
9597
publisher: _publisher,
9698
outputDirectory: _outputDirectory,
9799
addNamespaces: _addNamespaces,
100+
javaScript: _javaScript,
101+
addModules: _addModules,
98102
customZimFavicon,
99103
optimisationCacheUrl,
100104
customFlavour,
@@ -163,6 +167,10 @@ async function execute(argv: any) {
163167
MediaWiki.password = mwPassword
164168
MediaWiki.username = mwUsername
165169

170+
const javaScript = _javaScript || 'trusted'
171+
const addModules = _addModules ? String(_addModules).split(',') : []
172+
const trustedJs = javaScript === 'none' ? null : javaScript === 'trusted' ? config.output.mw.js_trusted.concat(addModules) : []
173+
166174
/* Download helpers; TODO: Merge with something else / expand this. */
167175
Downloader.init = {
168176
uaString: `${config.userAgent} (${adminEmail})`,
@@ -171,6 +179,7 @@ async function execute(argv: any) {
171179
optimisationCacheUrl,
172180
s3,
173181
webp,
182+
trustedJs,
174183
insecure: argv.insecure,
175184
}
176185

@@ -431,12 +440,48 @@ async function execute(argv: any) {
431440
const { jsModuleDependencies, cssModuleDependencies, staticFilesList } = await saveArticles(zimCreator, dump)
432441
logger.log(`Fetching Articles finished in ${(Date.now() - stime) / 1000} seconds`)
433442

434-
logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`)
435-
logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`)
436-
437443
logger.info('Copying Static Resource Files')
438444
await saveStaticFiles(staticFilesList, zimCreator)
439445

446+
if (javaScript === 'none') {
447+
jsModuleDependencies.clear()
448+
} else if (RenderingContext.articlesRenderer.constructor.name === 'ActionParseRenderer') {
449+
// Get list of all possible modules from startup
450+
const allModules = await downloadAndSaveStartupModule(zimCreator)
451+
addModules.forEach((oneModule) => {
452+
jsModuleDependencies.add(oneModule)
453+
})
454+
// Include known dynamic dependencies
455+
const dynamicJsDeps = config.output.mw.js_dynamic_dependencies
456+
Object.keys(dynamicJsDeps).forEach((oneDep: keyof typeof dynamicJsDeps) => {
457+
if (jsModuleDependencies.has(oneDep)) {
458+
dynamicJsDeps[oneDep].forEach((extraDep) => {
459+
jsModuleDependencies.add(extraDep)
460+
})
461+
}
462+
})
463+
// Include all dependencies of the dependencies
464+
jsModuleDependencies.forEach((oneDep) => {
465+
const oneModule = allModules.find((oneModule) => oneModule[0] === oneDep)
466+
if (!oneModule) {
467+
jsModuleDependencies.delete(oneDep)
468+
return logger.warn(`Unknown JS module [${oneDep}] removed`)
469+
}
470+
getModuleDependencies(oneModule, allModules).forEach((extraDep) => {
471+
jsModuleDependencies.add(extraDep)
472+
})
473+
})
474+
// Don't store JS for CSS modules
475+
cssModuleDependencies.forEach((oneModule) => {
476+
if (!addModules.includes(oneModule)) {
477+
jsModuleDependencies.delete(oneModule)
478+
}
479+
})
480+
}
481+
482+
logger.log(`Found [${jsModuleDependencies.size}] js module dependencies`)
483+
logger.log(`Found [${cssModuleDependencies.size}] style module dependencies`)
484+
440485
const allDependenciesWithType = [
441486
{ type: 'js', moduleList: Array.from(jsModuleDependencies) },
442487
{ type: 'css', moduleList: Array.from(cssModuleDependencies) },
@@ -448,6 +493,9 @@ async function execute(argv: any) {
448493
return pmap(
449494
moduleList,
450495
(oneModule) => {
496+
if (oneModule.startsWith('user')) {
497+
return
498+
}
451499
return downloadAndSaveModule(zimCreator, oneModule, type as any)
452500
},
453501
{ concurrency: Downloader.speed },

src/parameterList.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ export const parameterDescriptions = {
3737
withoutZimFullTextIndex: "Don't include a fulltext search index to the ZIM",
3838
webp: 'Convert all jpeg, png and gif images to webp format',
3939
addNamespaces: 'Force additional namespace (comma separated numbers)',
40+
javaScript: 'Amount of JavaScript being allowed in pages, one of the following values can be given: "none", "trusted" or "all" (default being "trusted").',
41+
addModules: 'Add additional ResourceLoader modules for dynamic loading (comma separated list)',
4042
osTmpDir: 'Override default operating system temporary directory path environment variable',
4143
optimisationCacheUrl: 'Object Storage URL (including credentials and bucket name) to cache optimised media files',
4244
forceRender:

0 commit comments

Comments
 (0)