Skip to content

Commit c80e6e4

Browse files
committed
Analyze FilterHTMLTags function for improvements
1 parent 058c0f3 commit c80e6e4

File tree

2 files changed

+112
-12
lines changed

2 files changed

+112
-12
lines changed

pkg/sanitize/sanitize.go

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
package sanitize
22

33
import (
4-
"github.com/microcosm-cc/bluemonday"
4+
"sync"
5+
6+
"github.com/microcosm-cc/bluemonday"
57
)
68

79
var policy *bluemonday.Policy
10+
var policyOnce sync.Once
811

912
func Sanitize(input string) string {
1013
return FilterHTMLTags(FilterInvisibleCharacters(input))
@@ -31,24 +34,44 @@ func FilterInvisibleCharacters(input string) string {
3134
}
3235

3336
func FilterHTMLTags(input string) string {
34-
if policy == nil {
35-
policyInit()
36-
}
37+
if policy == nil {
38+
policyInit()
39+
}
3740
if input == "" {
3841
return input
3942
}
4043
return policy.Sanitize(input)
4144
}
4245

4346
func policyInit() {
44-
if policy != nil {
45-
return
46-
}
47-
policy = bluemonday.StrictPolicy()
48-
policy.AllowElements("b", "blockquote", "br", "code", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "li", "ol", "p", "pre", "strong", "sub", "sup", "table", "tbody", "td", "th", "thead", "tr", "ul")
49-
policy.AllowAttrs("img", "a")
50-
policy.AllowURLSchemes("https")
51-
policy.AllowImages()
47+
policyOnce.Do(func() {
48+
p := bluemonday.StrictPolicy()
49+
50+
// Safe textual/formatting elements only
51+
p.AllowElements(
52+
"b", "blockquote", "br", "code", "em",
53+
"h1", "h2", "h3", "h4", "h5", "h6",
54+
"hr", "i", "li", "ol", "p", "pre",
55+
"strong", "sub", "sup", "table", "tbody",
56+
"td", "th", "thead", "tr", "ul",
57+
// Explicitly allow anchors and images with constrained attributes
58+
"a", "img",
59+
)
60+
61+
// Links: only allow https href and require parseable URLs
62+
p.AllowAttrs("href").OnElements("a")
63+
p.AllowURLSchemes("https")
64+
p.RequireParseableURLs(true)
65+
p.RequireNoFollowOnLinks()
66+
p.RequireNoReferrerOnLinks()
67+
p.AddTargetBlankToFullyQualifiedLinks()
68+
69+
// Images: allow only https src and basic descriptive attributes
70+
p.AllowImages()
71+
p.AllowAttrs("src", "alt", "title").OnElements("img")
72+
73+
policy = p
74+
})
5275
}
5376

5477
func shouldRemoveRune(r rune) bool {

pkg/sanitize/sanitize_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,82 @@
11
package sanitize
22

3+
import "testing"
4+
5+
func TestFilterHTMLTags(t *testing.T) {
6+
tests := []struct {
7+
name string
8+
in string
9+
expected string
10+
}{
11+
{
12+
name: "anchor https allowed",
13+
in: `<a href="https://good.com">ok</a>`,
14+
expected: `<a href="https://good.com">ok</a>`,
15+
},
16+
{
17+
name: "anchor javascript stripped href",
18+
in: `<a href="javascript:alert(1)">bad</a>`,
19+
expected: `<a>bad</a>`,
20+
},
21+
{
22+
name: "anchor protocol-relative href removed",
23+
in: `<a href="//example.com">text</a>`,
24+
expected: `<a>text</a>`,
25+
},
26+
{
27+
name: "anchor relative href removed",
28+
in: `<a href="/path">rel</a>`,
29+
expected: `<a>rel</a>`,
30+
},
31+
{
32+
name: "image https allowed",
33+
in: `<img src="https://img.example/x.png" alt="x" title="y">`,
34+
expected: `<img src="https://img.example/x.png" alt="x" title="y">`,
35+
},
36+
{
37+
name: "image data uri src removed",
38+
in: `<img src="data:image/svg+xml,xxx" alt="x">`,
39+
expected: `<img alt="x">`,
40+
},
41+
{
42+
name: "image srcset removed",
43+
in: `<img src="https://a/x.png" srcset="https://a 1x, https://b 2x" alt="x">`,
44+
expected: `<img src="https://a/x.png" alt="x">`,
45+
},
46+
{
47+
name: "onclick removed",
48+
in: `<p onclick="alert(1)">text</p>`,
49+
expected: `<p>text</p>`,
50+
},
51+
{
52+
name: "script removed",
53+
in: `<script>alert(1)</script>ok`,
54+
expected: `ok`,
55+
},
56+
{
57+
name: "iframe removed",
58+
in: `<iframe src="https://example.com"></iframe>ok`,
59+
expected: `ok`,
60+
},
61+
{
62+
name: "style attribute removed",
63+
in: `<p style="color:red">x</p>`,
64+
expected: `<p>x</p>`,
65+
},
66+
}
67+
68+
for _, tt := range tests {
69+
t.Run(tt.name, func(t *testing.T) {
70+
got := FilterHTMLTags(tt.in)
71+
if got != tt.expected {
72+
t.Fatalf("unexpected sanitize result.\ninput: %q\nexpected: %q\nactual: %q", tt.in, tt.expected, got)
73+
}
74+
})
75+
}
76+
}
77+
78+
package sanitize
79+
380
import (
481
"testing"
582

0 commit comments

Comments
 (0)