Skip to content

Commit 25cafeb

Browse files
committed
Merge branch 'bugfix/case-sensitivity-of-tags'
2 parents 2dca884 + 7a52457 commit 25cafeb

File tree

11 files changed

+193
-30
lines changed

11 files changed

+193
-30
lines changed

integration-tests/test-generic.mediawiki

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,18 @@ All (raster) images will be scaled down and turned into grayscale images. SVGs s
8888
8989
=== Galleries ===
9090
91-
<gallery>
91+
<GALLery>
9292
Wikimedia_Servers-0051_19.jpg|With some caption.
9393
Datei:Wikipedia-logo-v2.svg
94-
</gallery>
94+
</gallERY>
9595
9696
=== Image maps ===
9797
98-
<imagemap>
98+
<IMAGEmap>
9999
Image:Wikimedia_Servers-0051_19.jpg|With some caption
100100
rect 1 1 741 445 [[Oregon|Oregon]]
101101
desc none
102-
</imagemap>
102+
</imageMAP>
103103
104104
== Tables ==
105105
@@ -130,9 +130,9 @@ A bit tricky but they work as well:
130130
131131
== References ==
132132
133-
They will be collected<ref name="another-ref">This is true</ref> and are visible<ref>Some reference</ref> at the end of the document:<ref name="another-ref" /><br>
133+
They will be collected<ref name="another-ref">This is true</ref> and are visible<REF>Some reference</ref> at the end of the document:<REF name="another-ref" /><br>
134134
135-
There are also grouped references possible.<ref group="some-group">Some grouped ref</ref><br>
135+
There are also grouped references possible.<ref group="some-group">Some grouped ref</REF><br>
136136
Even named<ref group="some-group" name="grouped-ref-name"/> grouped references work!<ref group="some-group" name="grouped-ref-name">Some named grouped ref</ref><br>
137137
138138
Normal refs:<br>
@@ -158,7 +158,7 @@ Grouped refs:<br>
158158
Some text has the <nowiki>keyword</nowiki>.
159159
160160
It can also be used in the following ways:
161-
* <nowiki>in lists</nowiki>
161+
* <NOwiki>in lists</noWIKI>
162162
* also with <nowiki>lists</nowiki> containing text
163163
164164
Also in comments: <!-- this <nowiki>part</nowiki> should not be visible -->

src/parser/image.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,16 +93,16 @@ func (t *Tokenizer) parseGalleries(content string) string {
9393
trimmedLine := strings.TrimSpace(line)
9494

9595
// Gallery ends -> Simply remove line and end "withinGallery" mode
96-
if strings.HasPrefix(trimmedLine, "</gallery>") {
96+
if util.HasPrefixIgnoreCase(trimmedLine, "</gallery>") {
9797
withinGallery = false
9898

99-
if trimmedLine == "</gallery>" {
99+
if util.EqualsIgnoreCase(trimmedLine, "</gallery>") {
100100
// This line just contains the tag -> ignore it and proceed with parsing
101101
continue
102102
}
103103

104104
// If the line contains more than the closing tag -> Keep it and proceed with the processing
105-
line = strings.ReplaceAll(line, "</gallery>", "")
105+
line = util.ReplaceAllIgnoreCase(line, "</gallery>", "")
106106
} else if galleryStartRegex.MatchString(trimmedLine) {
107107
withinGallery = true
108108

@@ -159,13 +159,13 @@ func (t *Tokenizer) parseImageMaps(content string) string {
159159
line := lines[i]
160160

161161
// Delete uninteresting lines (end of map or all the polygon-map-stuff in between)
162-
if withinImageMap || line == "</imagemap>" {
162+
if withinImageMap || util.EqualsIgnoreCase(line, "</imagemap>") {
163163
// delete this line i
164164
lines = append(lines[:i], lines[i+1:]...)
165165
i--
166166

167167
// Imagemap ends -> end "withinImageMap" mode
168-
if line == "</imagemap>" {
168+
if util.EqualsIgnoreCase(line, "</imagemap>") {
169169
withinImageMap = false
170170
}
171171

src/parser/image_test.go

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ func TestEscapeImages_leadingSpecialChar(t *testing.T) {
110110
func TestParseGalleries(t *testing.T) {
111111
tokenizer := NewTokenizerWithMockWikipediaService()
112112
content := tokenizer.parseGalleries(`foo
113-
<gallery>file0.jpg
113+
<gallery>file0.jpg|thumb
114114
file:file1.jpg|captiion
115115
</gallery>
116116
bar
@@ -120,7 +120,7 @@ File:file2.jpg|test123
120120
</gallery>blubb`)
121121

122122
test.AssertEqual(t, `foo
123-
[[File:File0.jpg|mini]]
123+
[[File:File0.jpg|thumb]]
124124
[[file:File1.jpg|mini|captiion]]
125125
bar
126126
[[File:File2.jpg|mini|test123]]
@@ -130,6 +130,29 @@ blubb`, content)
130130
test.AssertMapEqual(t, map[string]Token{}, tokenizer.getTokenMap())
131131
}
132132

133+
func TestParseGalleries_caseInsensitivity(t *testing.T) {
134+
tokenizer := NewTokenizerWithMockWikipediaService()
135+
content := tokenizer.parseGalleries(`foo
136+
<GALLERY>file0.jpg|thumb
137+
file:file1.jpg|captiion
138+
</gallery>
139+
bar
140+
<gallery some="parameter">
141+
File:file2.jpg|test123|THUMB
142+
file 3.jpg
143+
</GALLERY>blubb`)
144+
145+
test.AssertEqual(t, `foo
146+
[[File:File0.jpg|thumb]]
147+
[[file:File1.jpg|mini|captiion]]
148+
bar
149+
[[File:File2.jpg|test123|THUMB]]
150+
[[File:File_3.jpg|mini]]
151+
blubb`, content)
152+
153+
test.AssertMapEqual(t, map[string]Token{}, tokenizer.getTokenMap())
154+
}
155+
133156
func TestParseGalleries_emptyGallery(t *testing.T) {
134157
tokenizer := NewTokenizerWithMockWikipediaService()
135158
content := tokenizer.parseGalleries(`foo
@@ -166,6 +189,29 @@ blubb`, content)
166189
test.AssertMapEqual(t, map[string]Token{}, tokenizer.getTokenMap())
167190
}
168191

192+
func TestParseImagemaps_caseInsensitivity(t *testing.T) {
193+
tokenizer := NewTokenizerWithMockWikipediaService()
194+
content := tokenizer.parseImageMaps(`foo
195+
<IMAGEMAP>File:picture.jpg
196+
some
197+
stuff
198+
</imagemap>
199+
bar
200+
<imagemap some="parameter">
201+
Image:picture.jpg
202+
some stuff
203+
</IMAGEMAP>
204+
blubb`)
205+
206+
test.AssertEqual(t, `foo
207+
[[File:Picture.jpg]]
208+
bar
209+
[[Image:Picture.jpg]]
210+
blubb`, content)
211+
212+
test.AssertMapEqual(t, map[string]Token{}, tokenizer.getTokenMap())
213+
}
214+
169215
func TestParseImages_inlineHappyPath(t *testing.T) {
170216
setup()
171217
tokenizer := NewTokenizerWithMockWikipediaService()

src/parser/nowiki.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package parser
22

3+
import "wiki2book/util"
4+
35
type NowikiToken struct {
46
Token
57
Content string
@@ -19,8 +21,8 @@ func (t *Tokenizer) parseNowiki(content string) string {
1921
for i := 0; i < len(content)-nowikiEndLen; i++ {
2022
cursor := content[i : i+nowikiStartLen]
2123

22-
if cursor == nowikiStart {
23-
endIndex := FindCorrespondingCloseToken(content, i+nowikiStartLen, nowikiStart, nowikiEnd)
24+
if util.EqualsIgnoreCase(cursor, nowikiStart) {
25+
endIndex := FindCorrespondingCloseTokenIgnoreCase(content, i+nowikiStartLen, nowikiStart, nowikiEnd)
2426

2527
token := NowikiToken{
2628
Content: content[i+nowikiStartLen : endIndex],

src/parser/nowiki_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@ func TestNowiki(t *testing.T) {
1919
}, tokenizer.getTokenMap())
2020
}
2121

22+
func TestNowiki_caseInsensitivity(t *testing.T) {
23+
tokenizer := NewTokenizerWithMockWikipediaService()
24+
content := "Foo<NOWIKI>something</nowiki> bar <nowiki>something else</NOWIKI> blubb"
25+
expectedContent := "Foo" + fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_NOWIKI, 0) + " bar " + fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_NOWIKI, 1) + " blubb"
26+
27+
newContent := tokenizer.parseNowiki(content)
28+
29+
test.AssertEqual(t, expectedContent, newContent)
30+
test.AssertMapEqual(t, map[string]Token{
31+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_NOWIKI, 0): NowikiToken{Content: "something"},
32+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_NOWIKI, 1): NowikiToken{Content: "something else"},
33+
}, tokenizer.getTokenMap())
34+
}
35+
2236
func TestNowiki_endOfText(t *testing.T) {
2337
tokenizer := NewTokenizerWithMockWikipediaService()
2438
content := "Foo<nowiki>something</nowiki>"

src/parser/pattern.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ var (
4040

4141
// Media files
4242
var (
43-
galleryStartRegex = regexp.MustCompile(`^<gallery.*?>`)
44-
imagemapStartRegex = regexp.MustCompile(`^<imagemap.*?>`)
45-
hasNonInlineParameterRegex = regexp.MustCompile("(" + strings.Join(imageNonInlineParameters, "|") + ")")
43+
galleryStartRegex = regexp.MustCompile(`(?i)^<gallery.*?>`)
44+
imagemapStartRegex = regexp.MustCompile(`(?i)^<imagemap.*?>`)
45+
hasNonInlineParameterRegex = regexp.MustCompile("(?i)(" + strings.Join(imageNonInlineParameters, "|") + ")")
4646
)
4747

4848
// Tables
@@ -54,9 +54,9 @@ var (
5454

5555
// References
5656
var (
57-
referencePlaceholderShortRegex = regexp.MustCompile(`<references.*?/\s*>`) // <references />
58-
referencePlaceholderStartRegex = regexp.MustCompile(`<references.*?\s*>`) // <references group="foo" >
59-
referencePlaceholderEndRegex = regexp.MustCompile(`</references\s*>`) // </references>
57+
referencePlaceholderShortRegex = regexp.MustCompile(`(?i)<references.*?/\s*>`) // <references />
58+
referencePlaceholderStartRegex = regexp.MustCompile(`(?i)<references.*?\s*>`) // <references group="foo" >
59+
referencePlaceholderEndRegex = regexp.MustCompile(`(?i)</references\s*>`) // </references>
6060
)
6161

6262
// Math

src/parser/reference.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ func (t *Tokenizer) parseReferences(content string) string {
6161

6262
for i := 0; i < len(content)-refDefStartLen; i++ {
6363
cursor := content[i : i+refDefStartLen]
64-
if cursor != refDefStart && cursor != refPlaceholderEnd {
64+
if !util.EqualsIgnoreCase(cursor, refDefStart) && !util.EqualsIgnoreCase(cursor, refPlaceholderEnd) {
6565
// Cursor is not on the beginning of any reference related tag.
6666
continue
6767
}
6868

69-
startEndIndex := FindCorrespondingCloseToken(content, i+refDefStartLen, refDefStart, xmlClosing)
69+
startEndIndex := FindCorrespondingCloseTokenIgnoreCase(content, i+refDefStartLen, refDefStart, xmlClosing)
7070
if startEndIndex == -1 {
7171
// XML for <ref not closed -> broken wikitext
7272
sigolo.Errorf("XML element for reference start '%s' not closed (i.e. missing '%s'). Text around this location: ...%s...", refDefStart, xmlClosing, util.GetTextAround(content, i, 50))
@@ -117,7 +117,7 @@ func (t *Tokenizer) parseReferences(content string) string {
117117
refNumberCounterForCurrentGroup, content = t.parseNamedReferenceUsage(content, i, nameAttributeValue, nameToRefNumberForCurrentGroup, refNumberCounterForCurrentGroup, cursorWithinReferencePlaceholder, startEndIndex)
118118
} else {
119119
// Reference definition like "<ref name=...>Foobar</ref".
120-
refEndIndex := FindCorrespondingCloseToken(content, startEndIndex, refDefStart, refDefLongEnd)
120+
refEndIndex := FindCorrespondingCloseTokenIgnoreCase(content, startEndIndex, refDefStart, refDefLongEnd)
121121
if refEndIndex == -1 {
122122
// No end token found -> probably unsupported wikitext syntax (like nested refs)
123123
sigolo.Errorf("No end-part for reference start '%s' found. Text around this location: ...%s...", refDefStart, util.GetTextAround(content, i, 50))

src/parser/reference_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,34 @@ some footer`
3434
}, tokenizer.getTokenMap())
3535
}
3636

37+
func TestParseReferences_caseInsensitivity(t *testing.T) {
38+
tokenizer := NewTokenizerWithMockWikipediaService()
39+
content := `some text<REF>bar</ref>
40+
some<ref name="blubb">blubbeldy</REF> other<REF name="fooref" /> text
41+
<ReferenCES responsive>
42+
<REF name="fooref">foo</REF>
43+
</REFerENces>
44+
some footer`
45+
expectedContent := "some text" + fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_USAGE, 0) + "\n" +
46+
"some" + fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_USAGE, 1) + " other" + fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_USAGE, 2) + " text\n" +
47+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_DEF, 3) + "\n" +
48+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_DEF, 4) + "\n" +
49+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_DEF, 5) + "\n" +
50+
"some footer"
51+
52+
newContent := tokenizer.parseReferences(content)
53+
54+
test.AssertEqual(t, expectedContent, newContent)
55+
test.AssertMapEqual(t, map[string]Token{
56+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_USAGE, 0): RefUsageToken{Index: 0},
57+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_USAGE, 1): RefUsageToken{Index: 1},
58+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_USAGE, 2): RefUsageToken{Index: 2},
59+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_DEF, 3): RefDefinitionToken{Index: 0, Content: "bar"},
60+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_DEF, 4): RefDefinitionToken{Index: 1, Content: "blubbeldy"},
61+
fmt.Sprintf(TOKEN_TEMPLATE, TOKEN_REF_DEF, 5): RefDefinitionToken{Index: 2, Content: "foo"},
62+
}, tokenizer.getTokenMap())
63+
}
64+
3765
func TestParseReferences_tokenizeRefContent(t *testing.T) {
3866
tokenizer := NewTokenizerWithMockWikipediaService()
3967
content := `some text<ref>foo [[bar|Bar]]</ref>.`

src/parser/util.go

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,25 @@
11
package parser
22

3-
// FindCorrespondingCloseToken determines the index on which the given openingToken at the startIndex is closed.
3+
import "wiki2book/util"
4+
5+
// FindCorrespondingCloseToken determines the index on which the given openingToken at the startIndex is closed. If the
6+
// closing token has not been found, -1 is returned. This function is case-sensitive.
47
func FindCorrespondingCloseToken(content string, startIndex int, openingToken string, closingToken string) int {
5-
// Used as a primitive stack to count the degree of nesting the cursor is in. If a closing token has been found
6-
// and the nesting degree is 0, then the correct closing token has been found.
8+
return findCorrespondingCloseToken(content, startIndex, openingToken, closingToken, false)
9+
}
10+
11+
// FindCorrespondingCloseTokenIgnoreCase behaves like FindCorrespondingCloseToken but ignores the case of letters. This
12+
// function is case-insensitive.
13+
func FindCorrespondingCloseTokenIgnoreCase(content string, startIndex int, openingToken string, closingToken string) int {
14+
return findCorrespondingCloseToken(content, startIndex, openingToken, closingToken, true)
15+
}
16+
17+
// findCorrespondingCloseToken determines the index on which the given openingToken at the startIndex is closed. If the
18+
// closing token has not been found, -1 is returned.
19+
func findCorrespondingCloseToken(content string, startIndex int, openingToken string, closingToken string, ignoreCase bool) int {
20+
// Used as a primitive stack to count the degree of nesting the cursor is in. Every opening token increments the
21+
// counter, every closing token decrements it. If a closing token has been found and the nesting degree is 0, then
22+
// the correct closing token has been found.
723
closeTokenCounter := 0
824

925
// The tokens are considered to be of equal size
@@ -23,12 +39,30 @@ func FindCorrespondingCloseToken(content string, startIndex int, openingToken st
2339
cursorClosingToken = content[i : i+closingTokenSize]
2440
}
2541

26-
if openingToken != closingToken && cursorOpeningToken == openingToken {
42+
openingAndClosingTokenAreDifferent := false
43+
cursorIsOnOpeningToken := false
44+
if ignoreCase {
45+
openingAndClosingTokenAreDifferent = !util.EqualsIgnoreCase(openingToken, closingToken)
46+
cursorIsOnOpeningToken = util.EqualsIgnoreCase(cursorOpeningToken, openingToken)
47+
} else {
48+
openingAndClosingTokenAreDifferent = openingToken != closingToken
49+
cursorIsOnOpeningToken = cursorOpeningToken == openingToken
50+
}
51+
52+
cursorIsOnClosingToken := false
53+
if ignoreCase {
54+
cursorIsOnClosingToken = util.EqualsIgnoreCase(cursorClosingToken, closingToken)
55+
} else {
56+
cursorIsOnClosingToken = cursorClosingToken == closingToken
57+
}
58+
59+
foundNewOpeningToken := openingAndClosingTokenAreDifferent && cursorIsOnOpeningToken
60+
if foundNewOpeningToken {
2761
closeTokenCounter++
2862

2963
// Skip the found opening token. Use the "-1" to compensate the "+1" by the loop
3064
i += openingTokenSize - 1
31-
} else if cursorClosingToken == closingToken {
65+
} else if cursorIsOnClosingToken {
3266
if closeTokenCounter == 0 {
3367
return i
3468
} else {

src/parser/util_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,28 @@ func TestFindCorrespondingCloseToken_equalStartAndEndToken(t *testing.T) {
103103
index = FindCorrespondingCloseToken("abc$$defghi$$", 5, "$$", "$$")
104104
test.AssertEqual(t, 11, index)
105105
}
106+
107+
func TestFindCorrespondingCloseTokenIgnoreCase(t *testing.T) {
108+
var index int
109+
110+
index = FindCorrespondingCloseTokenIgnoreCase("abcfoodefbarghbari", 0, "foo", "bar")
111+
test.AssertEqual(t, 14, index)
112+
113+
index = FindCorrespondingCloseTokenIgnoreCase("abcFOOdefbarghbari", 0, "foo", "bar")
114+
test.AssertEqual(t, 14, index)
115+
116+
index = FindCorrespondingCloseTokenIgnoreCase("abcfoodefBARghBARi", 0, "foo", "bar")
117+
test.AssertEqual(t, 14, index)
118+
119+
index = FindCorrespondingCloseTokenIgnoreCase("abcFOOdefBARghBARi", 0, "foo", "bar")
120+
test.AssertEqual(t, 14, index)
121+
122+
index = FindCorrespondingCloseTokenIgnoreCase("abcfoodefbarghbari", 0, "FOO", "bar")
123+
test.AssertEqual(t, 14, index)
124+
125+
index = FindCorrespondingCloseTokenIgnoreCase("abcfoodefbarghbari", 0, "foo", "BAR")
126+
test.AssertEqual(t, 14, index)
127+
128+
index = FindCorrespondingCloseTokenIgnoreCase("abcfoodefbarghbari", 0, "FOO", "BAR")
129+
test.AssertEqual(t, 14, index)
130+
}

0 commit comments

Comments
 (0)