Skip to content

Commit 5d3218c

Browse files
GANReviewTool: TemplateFinder (#269)
* GANReviewTool: TemplateFinder * GANReviewTool: remove unused code * move constructor to the top of the class * remove old comment * add some tests * fix * fix * fix * fix * fix linter errors * switch back to ?? * add some tests demonstrating the power of the new library * Rename 'root' to 'wikiPage' in TemplateFinder * Add comment about Wikipedia extension tags Added a comment to clarify the parser's configuration. * fix tests * these look like dev dependencies * remove comma * remove comment * Use esbuild to bundle JS files * Update publish.php --------- Co-authored-by: NovemLinguae <79697282+NovemLinguae@users.noreply.github.com> Co-authored-by: NovemLinguae <novemlinguae@gmail.com>
1 parent 39d4214 commit 5d3218c

11 files changed

+991
-316
lines changed

GANReviewTool/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/dist/

GANReviewTool/modules/GANReviewWikicodeGenerator.js

Lines changed: 15 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/* eslint-disable indent */
2+
import { TemplateFinder } from './TemplateFinder.js';
23

34
export class GANReviewWikicodeGenerator {
45
getPassWikicodeForGANPage( reviewWikicode ) {
@@ -255,11 +256,12 @@ export class GANReviewWikicodeGenerator {
255256
}
256257

257258
getFirstTemplateNameFromWikicode( wikicode ) {
258-
const match = wikicode.match( /(?<=\{\{)[^|}]+/ );
259-
if ( !match ) {
259+
const templateFinder = new TemplateFinder( wikicode );
260+
const template = templateFinder.firstTemplate();
261+
if ( !template ) {
260262
throw new Error( 'getFirstTemplateNameFromWikicode: No template found in Wikicode.' );
261263
}
262-
return match[ 0 ];
264+
return TemplateFinder.removePrefix( template.name );
263265
}
264266

265267
/**
@@ -350,7 +352,7 @@ export class GANReviewWikicodeGenerator {
350352
const topicString = `\n|topic = ${ topic }`;
351353

352354
// https://en.wikipedia.org/wiki/Template:Article_history#How_to_use_in_practice
353-
const existingStatus = this.firstTemplateGetParameterValue( talkWikicode, 'Artricle history', 'currentstatus' );
355+
const existingStatus = this.firstTemplateGetParameterValue( talkWikicode, 'Article ?history', 'currentstatus' );
354356
talkWikicode = this.firstTemplateDeleteParameter( talkWikicode, 'Article ?history', 'currentstatus' );
355357
const currentStatusString = this.getArticleHistoryNewStatus( existingStatus, listedOrFailed );
356358

@@ -393,84 +395,20 @@ export class GANReviewWikicodeGenerator {
393395
}
394396

395397
firstTemplateInsertCode( wikicode, templateNameRegExNoDelimiters, codeToInsert ) {
396-
// TODO: handle nested templates
397-
const regex = new RegExp( `(\\{\\{${ templateNameRegExNoDelimiters }[^\\}]*)(\\}\\})`, 'i' );
398-
return wikicode.replace( regex, `$1\n${ codeToInsert }\n$2` );
399-
}
400-
401-
firstTemplateGetParameterValue( wikicode, template, parameter ) {
402-
// TODO: rewrite to be more robust. currently using a simple algorithm that is prone to failure
403-
// new algorithm:
404-
// find start of template. use regex /i (ignore case)
405-
// iterate using loops until end of template found
406-
// handle <nowiki>
407-
// handle triple {{{
408-
// handle nested
409-
410-
const regex = new RegExp( `\\|\\s*${ parameter }\\s*=\\s*([^\\n\\|\\}]*)\\s*`, '' );
411-
const result = wikicode.match( regex );
412-
if ( wikicode.match( regex ) === null ) {
413-
return null;
414-
}
415-
return result[ 1 ];
398+
const templateFinder = new TemplateFinder( wikicode );
399+
templateFinder.firstTemplateInsertCode( templateNameRegExNoDelimiters, codeToInsert );
400+
return templateFinder.getWikitext();
416401
}
417402

418-
/**
419-
* @param {RegExp} regex
420-
*/
421-
preg_position( regex, haystack ) {
422-
const matches = [ ...haystack.matchAll( regex ) ];
423-
const hasMatches = matches.length;
424-
if ( hasMatches ) {
425-
return matches[ 0 ].index;
426-
}
427-
return false;
428-
}
429-
430-
findEndOfTemplate( wikicode, templateStartPosition ) {
431-
// TODO: handle triple braces, handle <nowiki> tags
432-
let nesting = 0;
433-
let templateEndPosition = -1;
434-
// +1 to skip the first {{, will throw off our nesting count
435-
for ( let i = templateStartPosition + 1; i < wikicode.length; i++ ) {
436-
const nextTwoChars = wikicode.slice( i, i + 2 );
437-
if ( nextTwoChars === '{{' ) {
438-
nesting++;
439-
continue;
440-
} else if ( nextTwoChars === '}}' ) {
441-
if ( nesting > 0 ) {
442-
nesting--;
443-
continue;
444-
} else {
445-
templateEndPosition = i + 2;
446-
break;
447-
}
448-
}
449-
}
450-
return templateEndPosition;
403+
firstTemplateGetParameterValue( wikicode, templateRegEx, parameter ) {
404+
const templateFinder = new TemplateFinder( wikicode );
405+
return templateFinder.firstTemplateGetParameterValue( templateRegEx, parameter );
451406
}
452407

453408
firstTemplateDeleteParameter( wikicode, templateRegEx, parameter ) {
454-
// templateStartPosition
455-
const regex = new RegExp( '{{' + templateRegEx, 'gi' );
456-
const templateStartPosition = this.preg_position( regex, wikicode );
457-
458-
// templateEndPosition
459-
const templateEndPosition = this.findEndOfTemplate( wikicode, templateStartPosition );
460-
461-
// slice
462-
const firstPiece = wikicode.slice( 0, templateStartPosition );
463-
let secondPiece = wikicode.slice( templateStartPosition, templateEndPosition );
464-
const thirdPiece = wikicode.slice( templateEndPosition );
465-
466-
// replace only inside the slice
467-
const regex2 = new RegExp( `\\|\\s*${ parameter }\\s*=\\s*([^\\n\\|\\}]*)\\s*`, '' );
468-
secondPiece = secondPiece.replace( regex2, '' );
469-
470-
// glue back together
471-
wikicode = firstPiece + secondPiece + thirdPiece;
472-
473-
return wikicode;
409+
const templateFinder = new TemplateFinder( wikicode );
410+
templateFinder.firstTemplateDeleteParameter( templateRegEx, parameter );
411+
return templateFinder.getWikitext();
474412
}
475413

476414
removeFormattingThatInterferesWithSort( str ) {

GANReviewTool/modules/GARCloserWikicodeGenerator.js

Lines changed: 14 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { TemplateFinder } from './TemplateFinder.js';
2+
13
export class GARCloserWikicodeGenerator {
24
processKeepForGARPage( garPageWikicode, message, isCommunityAssessment ) {
35
return this.processGARPage( garPageWikicode, message, isCommunityAssessment, 'Kept.', 'green' );
@@ -583,15 +585,15 @@ __TOC__`;
583585
throw new Error( 'InvalidArgumentException' );
584586
}
585587

586-
const topic = this.firstTemplateGetParameterValue( wikicode, 'Artricle history', 'topic' );
588+
const topic = this.firstTemplateGetParameterValue( wikicode, 'Article ?history', 'topic' );
587589
let topicString = '';
588590
if ( !topic ) {
589591
topicString = `\n|topic = ${ topic }`;
590592
}
591593

592594
// https://en.wikipedia.org/wiki/Template:Article_history#How_to_use_in_practice
593-
const existingStatus = this.firstTemplateGetParameterValue( wikicode, 'Artricle history', 'currentstatus' );
594-
wikicode = this.firstTemplateDeleteParameter( wikicode, 'Article history', 'currentstatus' );
595+
const existingStatus = this.firstTemplateGetParameterValue( wikicode, 'Article ?history', 'currentstatus' );
596+
wikicode = this.firstTemplateDeleteParameter( wikicode, 'Article ?history', 'currentstatus' );
595597
const currentStatusString = this.getArticleHistoryNewStatus( existingStatus, keepOrDelist );
596598

597599
const result = this.getKeepOrDelistPastTense( keepOrDelist );
@@ -634,21 +636,9 @@ __TOC__`;
634636
}
635637
}
636638

637-
firstTemplateGetParameterValue( wikicode, template, parameter ) {
638-
// TODO: rewrite to be more robust. currently using a simple algorithm that is prone to failure
639-
// new algorithm:
640-
// find start of template. use regex /i (ignore case)
641-
// iterate using loops until end of template found
642-
// handle <nowiki>
643-
// handle triple {{{
644-
// handle nested
645-
646-
const regex = new RegExp( `\\|\\s*${ parameter }\\s*=\\s*([^\\n\\|\\}]*)\\s*`, '' );
647-
const result = wikicode.match( regex );
648-
if ( wikicode.match( regex ) === null ) {
649-
return null;
650-
}
651-
return result[ 1 ];
639+
firstTemplateGetParameterValue( wikicode, templateRegEx, parameter ) {
640+
const templateFinder = new TemplateFinder( wikicode );
641+
return templateFinder.firstTemplateGetParameterValue( templateRegEx, parameter );
652642
}
653643

654644
getArticleHistoryNewStatus( existingStatus, keepOrDelist ) {
@@ -663,63 +653,18 @@ __TOC__`;
663653
* @param {Array} templateNameArrayCaseInsensitive
664654
*/
665655
firstTemplateInsertCode( wikicode, templateNameArrayCaseInsensitive, codeToInsert ) {
666-
for ( const templateName of templateNameArrayCaseInsensitive ) {
667-
const strPosOfEndOfFirstTemplate = this.getStrPosOfEndOfFirstTemplateFound( wikicode, templateName );
668-
if ( strPosOfEndOfFirstTemplate !== null ) {
669-
const insertPosition = strPosOfEndOfFirstTemplate - 2; // 2 characters from the end, right before }}
670-
const result = this.insertStringIntoStringAtPosition( wikicode, `\n${ codeToInsert }\n`, insertPosition );
671-
return result;
672-
}
673-
}
674-
}
675-
676-
/**
677-
* CC BY-SA 4.0, jAndy, https://stackoverflow.com/a/4364902/3480193
678-
*/
679-
insertStringIntoStringAtPosition( bigString, insertString, position ) {
680-
return [
681-
bigString.slice( 0, position ),
682-
insertString,
683-
bigString.slice( position )
684-
].join( '' );
685-
}
686-
687-
/**
688-
* Grabs string position of the END of first {{template}} contained in wikicode. Case insensitive. Returns null if no template found. Handles nested templates.
689-
*
690-
* @return {number|null}
691-
*/
692-
getStrPosOfEndOfFirstTemplateFound( wikicode, templateName ) {
693-
const starting_position = wikicode.toLowerCase().indexOf( '{{' + templateName.toLowerCase() );
694-
if ( starting_position === -1 ) {
695-
return null;
696-
}
697-
let counter = 0;
698-
const length = wikicode.length;
699-
for ( let i = starting_position + 2; i < length; i++ ) {
700-
const next_two = wikicode.substr( i, 2 );
701-
if ( next_two == '{{' ) {
702-
counter++;
703-
continue;
704-
} else if ( next_two == '}}' ) {
705-
if ( counter == 0 ) {
706-
return i + 2; // +2 to account for next_two being }} (2 characters)
707-
} else {
708-
counter--;
709-
continue;
710-
}
711-
}
712-
}
713-
return null;
656+
const templateFinder = new TemplateFinder( wikicode );
657+
templateFinder.firstTemplateInsertCode( templateNameArrayCaseInsensitive, codeToInsert );
658+
return templateFinder.getWikitext();
714659
}
715660

716661
removeGAStatusFromWikiprojectBanners( wikicode ) {
717662
return wikicode.replace( /(\|\s*class\s*=\s*)([^}|\s]*)/gi, '$1' );
718663
}
719664

720665
firstTemplateDeleteParameter( wikicode, template, parameter ) {
721-
// TODO: rewrite to be more robust. currently using a simple algorithm that is prone to failure
722-
const regex = new RegExp( `\\|\\s*${ parameter }\\s*=\\s*([^\\n\\|\\}]*)\\s*`, '' );
723-
return wikicode.replace( regex, '' );
666+
const templateFinder = new TemplateFinder( wikicode );
667+
templateFinder.firstTemplateDeleteParameter( template, parameter );
668+
return templateFinder.getWikitext();
724669
}
725670
}

GANReviewTool/modules/Parser.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import Parser from 'wikiparser-template';
2+
3+
// The parser needs to know all extension tags enabled on Wikipedia
4+
Parser.config = {
5+
ext: [
6+
'pre',
7+
'nowiki',
8+
'gallery',
9+
'indicator',
10+
'langconvert',
11+
'graph',
12+
'timeline',
13+
'hiero',
14+
'charinsert',
15+
'ref',
16+
'references',
17+
'inputbox',
18+
'imagemap',
19+
'source',
20+
'syntaxhighlight',
21+
'poem',
22+
'categorytree',
23+
'section',
24+
'score',
25+
'templatestyles',
26+
'templatedata',
27+
'math',
28+
'ce',
29+
'chem',
30+
'maplink',
31+
'mapframe',
32+
'page-collection',
33+
'phonos'
34+
]
35+
};
36+
37+
export default Parser;
Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,57 @@
1-
// TODO: A couple of recent bugs will require a lexer or template parser type class to solve.
1+
import Parser from './Parser.js';
22

3-
class TemplateFinder {
4-
// getTemplateList()
5-
// appendParameter()
6-
// addWikitextAfterTemplate()
7-
// getWikitext()
3+
export class TemplateFinder {
4+
constructor( wikicode ) {
5+
this.wikiPage = Parser.parse( wikicode, false, 2 );
6+
}
87

9-
// appends/adds need to shift all the position variables by the length of the append/add
10-
}
8+
static removePrefix( templateName ) {
9+
return templateName.replace( /^Template:/, '' );
10+
}
11+
12+
getWikitext() {
13+
return String( this.wikiPage );
14+
}
15+
16+
firstTemplate( templateNameRegExOrArrayCaseInsensitive ) {
17+
let filter;
18+
if ( !templateNameRegExOrArrayCaseInsensitive ) {
19+
filter = () => true;
20+
} else if ( Array.isArray( templateNameRegExOrArrayCaseInsensitive ) ) {
21+
const templateNameArray = templateNameRegExOrArrayCaseInsensitive
22+
.map( ( name ) => name.toLowerCase().replace( /\s/g, '_' ) );
23+
filter = ( { name } ) => templateNameArray.includes( TemplateFinder.removePrefix( name ).toLowerCase() );
24+
} else {
25+
const regEx = new RegExp( `^Template:${ templateNameRegExOrArrayCaseInsensitive }$`, 'i' );
26+
filter = ( { name } ) => regEx.test( name.replace( /_/g, ' ' ) );
27+
}
28+
return this.wikiPage.querySelectorAll( 'template' ).find( filter );
29+
}
30+
31+
firstTemplateInsertCode( templateNameRegExOrArrayCaseInsensitive, codeToInsert ) {
32+
const template = this.firstTemplate( templateNameRegExOrArrayCaseInsensitive );
33+
if ( template ) {
34+
template.append( `${ codeToInsert.replace( /^\|/, '' ) }\n` );
35+
}
36+
}
37+
38+
firstTemplateGetParameterValue( templateNameRegExOrArrayCaseInsensitive, parameter ) {
39+
const template = this.firstTemplate( templateNameRegExOrArrayCaseInsensitive );
40+
if ( !template ) {
41+
return null;
42+
}
43+
const value = template.getValue( parameter );
44+
return value === undefined ? null : value;
45+
}
46+
47+
firstTemplateDeleteParameter( templateNameRegExOrArrayCaseInsensitive, parameter ) {
48+
const template = this.firstTemplate( templateNameRegExOrArrayCaseInsensitive );
49+
if ( template ) {
50+
for ( const token of template.getAllArgs() ) {
51+
if ( token.name.toLowerCase() === parameter ) {
52+
token.remove();
53+
}
54+
}
55+
}
56+
}
57+
}

0 commit comments

Comments
 (0)