@@ -3,7 +3,6 @@ import { config } from './config.js'
33import { contains , normalizeMwResponse , DB_ERROR , WEAK_ETAG_REGEX , stripHttpFromUrl , isBitmapImageMimeType , isWebpCandidateImageMimeType } from './util/index.js'
44import { Readable } from 'stream'
55import deepmerge from 'deepmerge'
6- import * as domino from 'domino'
76import { default as imagemin } from 'imagemin'
87import imageminAdvPng from 'imagemin-advpng'
98import type { BackoffStrategy } from 'backoff'
@@ -31,6 +30,7 @@ import RestApiURLDirector from './util/builders/url/rest-api.director.js'
3130import { Renderer } from './renderers/abstract.renderer.js'
3231import { findFirstMatchingRule , renderDownloadError } from './error.manager.js'
3332import RedisStore from './RedisStore.js'
33+ import { extractJsConfigVars } from './util/articles.js'
3434
3535const imageminOptions = new Map ( )
3636imageminOptions . set ( 'default' , new Map ( ) )
@@ -59,6 +59,7 @@ interface DownloaderOpts {
5959 optimisationCacheUrl : string
6060 s3 ?: S3
6161 webp : boolean
62+ trustedJs ?: string [ ]
6263 backoffOptions ?: BackoffOptions
6364 insecure ?: boolean
6465}
@@ -123,6 +124,7 @@ class Downloader {
123124 private _arrayBufferRequestOptions : AxiosRequestConfig
124125 private _jsonRequestOptions : AxiosRequestConfig
125126 private _streamRequestOptions : AxiosRequestConfig
127+ public trustedJs : string [ ] = [ ]
126128 public wikimediaMobileJsDependenciesList : string [ ] = [ ]
127129 public wikimediaMobileStyleDependenciesList : string [ ] = [ ]
128130
@@ -163,13 +165,14 @@ class Downloader {
163165 return this . _apiUrlDirector
164166 }
165167
166- set init ( { uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, backoffOptions, insecure } : DownloaderOpts ) {
168+ set init ( { uaString, speed, reqTimeout, optimisationCacheUrl, s3, webp, trustedJs = config . output . mw . js_trusted . slice ( ) , backoffOptions, insecure } : DownloaderOpts ) {
167169 this . reset ( )
168170 this . uaString = uaString
169171 this . _speed = speed
170172 this . _requestTimeout = reqTimeout
171173 this . optimisationCacheUrl = optimisationCacheUrl
172174 this . _webp = webp
175+ this . trustedJs = trustedJs
173176 this . s3 = s3
174177 this . _apiUrlDirector = new ApiURLDirector ( MediaWiki . actionApiUrl . href )
175178 this . insecure = insecure
@@ -271,6 +274,7 @@ class Downloader {
271274 this . _requestTimeout = undefined
272275 this . optimisationCacheUrl = undefined
273276 this . _webp = false
277+ this . trustedJs = [ ]
274278 this . s3 = undefined
275279 this . _apiUrlDirector = undefined
276280 this . insecure = false
@@ -849,7 +853,7 @@ class Downloader {
849853
850854 /* If article is missing (for example because it just has been deleted) */
851855 if ( articleData . error . code === 'missingtitle' ) {
852- return { jsConfigVars : '' , jsDependenciesList : [ ] , styleDependenciesList : [ ] }
856+ return { jsConfigVars : { } , jsDependenciesList : [ ] , styleDependenciesList : [ ] }
853857 }
854858
855859 /* Something went wrong in modules retrieval at app level (no HTTP error) */
@@ -870,7 +874,7 @@ class Downloader {
870874 logger . info ( `Js dependencies of ${ title } : ${ jsDependenciesList } ` )
871875 logger . info ( `Css dependencies of ${ title } : ${ styleDependenciesList } ` )
872876
873- const jsConfigVars = Downloader . extractJsConfigVars ( headhtml )
877+ const jsConfigVars = extractJsConfigVars ( headhtml )
874878
875879 // Download mobile page dependencies only once
876880 if ( ( await MediaWiki . hasWikimediaMobileApi ( ) ) && this . wikimediaMobileJsDependenciesList . length === 0 && this . wikimediaMobileStyleDependenciesList . length === 0 ) {
@@ -904,27 +908,6 @@ class Downloader {
904908 stream . on ( 'end' , ( ) => resolve ( Buffer . concat ( chunks ) ) )
905909 } )
906910 }
907-
908- public static extractJsConfigVars ( headhtml : string ) {
909- let jsConfigVars = ''
910-
911- // Saving, as a js module, the jsconfigvars that are set in the header of a wikipedia page
912- // the script below extracts the config with a regex executed on the page header returned from the api
913- const scriptTags = domino . createDocument ( `${ headhtml } </body></html>` ) . getElementsByTagName ( 'script' )
914- const regex = / m w \. c o n f i g \. s e t \( \{ .* ?\} \) ; / gm
915- for ( let i = 0 ; i < scriptTags . length ; i += 1 ) {
916- if ( scriptTags [ i ] . text . includes ( 'mw.config.set' ) ) {
917- jsConfigVars = regex . exec ( scriptTags [ i ] . text ) [ 0 ] || ''
918- jsConfigVars = `(window.RLQ=window.RLQ||[]).push(function() {${ jsConfigVars } });`
919- } else if ( scriptTags [ i ] . text . includes ( 'RLCONF' ) || scriptTags [ i ] . text . includes ( 'RLSTATE' ) || scriptTags [ i ] . text . includes ( 'RLPAGEMODULES' ) ) {
920- jsConfigVars = scriptTags [ i ] . text
921- }
922- }
923-
924- jsConfigVars = jsConfigVars . replace ( 'nosuchaction' , 'view' ) // to replace the wgAction config that is set to 'nosuchaction' from api but should be 'view'
925-
926- return jsConfigVars
927- }
928911}
929912
930913export { Downloader as DownloaderClass }
0 commit comments