Skip to content

Commit f478b0f

Browse files
authored
Merge pull request #4 from JuliaComputing/sp/gumbo0.8
gumbo 0.8 compat
2 parents 7a8707c + 8982223 commit f478b0f

File tree

5 files changed

+43
-27
lines changed

5 files changed

+43
-27
lines changed

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ os:
44
- linux
55
- osx
66
julia:
7-
- 1.0
87
- 1.3
98
- nightly
109
matrix:

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
88

99
[compat]
1010
julia = "1"
11-
Gumbo = "0.7"
11+
Gumbo = "0.8"
1212

1313
[extras]
1414
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

src/HTMLSanitizer.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,13 @@ end
127127
# 2. starts with `//` (protocol-relative).
128128
# 3. starts with `../`/`./` (relative directory traversal)
129129
# 4. doesn't start with either of the above and doesn't start with a protocol (e.g. `foo/bar.html`)
130-
is_relative_url(url) = occursin(r"\.?\.?//?"i, url) || !occursin(r"^\w+://"i, url)
130+
function is_relative_url(url)
131+
if occursin(r"^\.?\.?//?"i, url)
132+
return true
133+
else
134+
return !occursin(r"^\w+://"i, url)
135+
end
136+
end
131137

132138
"""
133139
Default whitelist. Allows many elements and attributes, but crucially removes `<script>` elements

test/malicious_html.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,42 +23,42 @@
2323

2424
@testset "<img>" begin
2525
@testset "should not be possible to inject JS via an unquoted <img> src attribute" begin
26-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src=javascript:alert('XSS')>""")
26+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src=javascript:alert('XSS')>""")
2727
end
2828

2929
@testset "should not be possible to inject JS using grave accents as <img> src delimiters" begin
30-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src=`javascript:alert('XSS')`>""")
30+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src=`javascript:alert('XSS')`>""")
3131
end
3232

3333
@testset "should not be possible to inject <script> via a malformed <img> tag" begin
34-
@test """<img></img>">""" == HTMLSanitizer.sanitize("""<img \"\"\"><script>alert("XSS")</script>">""")
34+
@test """<img/>\"&gt;""" == HTMLSanitizer.sanitize("""<img \"\"\"><script>alert("XSS")</script>">""")
3535
end
3636

3737
@testset "should not be possible to inject protocol-based JS" begin
38-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>""")
38+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>""")
3939

40-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>""")
40+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>""")
4141

42-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>""")
42+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>""")
4343

4444
# Encoded tab character.
45-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src="jav&#x09;ascript:alert('XSS');">""")
45+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src="jav&#x09;ascript:alert('XSS');">""")
4646

4747
# Encoded newline.
48-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src="jav&#x0A;ascript:alert('XSS');">""")
48+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src="jav&#x0A;ascript:alert('XSS');">""")
4949

5050
# Encoded carriage return.
51-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src="jav&#x0D;ascript:alert('XSS');">""")
51+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src="jav&#x0D;ascript:alert('XSS');">""")
5252

5353
# Spaces plus meta char.
54-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src=" &#14; javascript:alert('XSS');">""")
54+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src=" &#14; javascript:alert('XSS');">""")
5555

5656
# Mixed spaces and tabs.
57-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src="j\na v\tascript://alert('XSS');">""")
57+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src="j\na v\tascript://alert('XSS');">""")
5858
end
5959

6060
@testset "should not be possible to inject protocol-based JS via whitespace" begin
61-
@test "<img></img>" == HTMLSanitizer.sanitize("""<img src="jav\tascript:alert('XSS');">""")
61+
@test "<img/>" == HTMLSanitizer.sanitize("""<img src="jav\tascript:alert('XSS');">""")
6262
end
6363

6464
@testset "should not be possible to inject JS using a half-open <img> tag" begin

test/runtests.jl

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@ using Test
4747
end
4848

4949
@testset "test_whitelisted_longdesc_schemes_are_allowed" begin
50-
stuff = """<img longdesc="http://longdesc.com" src="./foo.jpg"></img>"""
50+
stuff = """<img longdesc="http://longdesc.com" src="./foo.jpg"/>"""
5151
html = HTMLSanitizer.sanitize(stuff)
5252
@test stuff == html
5353
end
5454

5555
@testset "test_weird_longdesc_schemes_are_removed" begin
56-
stuff = """<img src="./foo.jpg" longdesc="javascript:alert(1)"></img>"""
56+
stuff = """<img src="./foo.jpg" longdesc="javascript:alert(1)"/>"""
5757
html = HTMLSanitizer.sanitize(stuff)
58-
@test """<img src="./foo.jpg"></img>""" == html
58+
@test """<img src="./foo.jpg"/>""" == html
5959
end
6060

6161
@testset "test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes" begin
@@ -153,30 +153,30 @@ end
153153

154154
@testset "urls" begin
155155
@testset "relative" begin
156-
orig = """<img src="foo/bar.html"></img>"""
156+
orig = """<img src="foo/bar.html"/>"""
157157
@test sanitize(orig) == orig
158158

159-
orig = """<img src="/foo/bar.html"></img>"""
159+
orig = """<img src="/foo/bar.html"/>"""
160160
@test sanitize(orig) == orig
161161

162-
orig = """<img src="//foo/bar.html"></img>"""
162+
orig = """<img src="//foo/bar.html"/>"""
163163
@test sanitize(orig) == orig
164164

165-
orig = """<img src="./foo/bar.html"></img>"""
165+
orig = """<img src="./foo/bar.html"/>"""
166166
@test sanitize(orig) == orig
167167

168-
orig = """<img src="/asd://foo/bar.html"></img>"""
168+
orig = """<img src="/asd://foo/bar.html"/>"""
169169
@test sanitize(orig) == orig
170170
end
171171

172172
@testset "protocols" begin
173-
orig = """<img src="asd://foo/bar.html"></img>"""
174-
@test sanitize(orig) == "<img></img>"
173+
orig = """<img src="asd://foo/bar.html"/>"""
174+
@test sanitize(orig) == "<img/>"
175175

176-
orig = """<img src="http://foo/bar.html"></img>"""
176+
orig = """<img src="http://foo/bar.html"/>"""
177177
@test sanitize(orig) == orig
178178

179-
orig = """<img src="https://foo/bar.html"></img>"""
179+
orig = """<img src="https://foo/bar.html"/>"""
180180
@test sanitize(orig) == orig
181181
end
182182
end
@@ -186,4 +186,15 @@ end
186186
sanitize(html) == read(joinpath(@__DIR__, "testhtml_out.html"), String)
187187
end
188188

189+
@testset "relative urls" begin
190+
@test HTMLSanitizer.is_relative_url("/foo")
191+
@test HTMLSanitizer.is_relative_url("//foo")
192+
@test HTMLSanitizer.is_relative_url("./foo")
193+
@test HTMLSanitizer.is_relative_url("../foo")
194+
@test HTMLSanitizer.is_relative_url("foo")
195+
@test !HTMLSanitizer.is_relative_url("https://foo")
196+
@test !HTMLSanitizer.is_relative_url("http://foo")
197+
@test !HTMLSanitizer.is_relative_url("bar://foo")
198+
end
199+
189200
include("malicious_html.jl")

0 commit comments

Comments
 (0)