Skip to content

Commit 468296b

Browse files
committed
Add basic html sanitization
1 parent 062c13a commit 468296b

File tree

4 files changed

+88
-23
lines changed

4 files changed

+88
-23
lines changed

pkg/github/issues.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,15 +212,15 @@ func fragmentToIssue(fragment IssueFragment) *github.Issue {
212212

213213
return &github.Issue{
214214
Number: github.Ptr(int(fragment.Number)),
215-
Title: github.Ptr(sanitize.FilterInvisibleCharacters(string(fragment.Title))),
215+
Title: github.Ptr(sanitize.Sanitize(string(fragment.Title))),
216216
CreatedAt: &github.Timestamp{Time: fragment.CreatedAt.Time},
217217
UpdatedAt: &github.Timestamp{Time: fragment.UpdatedAt.Time},
218218
User: &github.User{
219219
Login: github.Ptr(string(fragment.Author.Login)),
220220
},
221221
State: github.Ptr(string(fragment.State)),
222222
ID: github.Ptr(fragment.DatabaseID),
223-
Body: github.Ptr(sanitize.FilterInvisibleCharacters(string(fragment.Body))),
223+
Body: github.Ptr(sanitize.Sanitize(string(fragment.Body))),
224224
Labels: foundLabels,
225225
Comments: github.Ptr(int(fragment.Comments.TotalCount)),
226226
}
@@ -327,10 +327,10 @@ func GetIssue(ctx context.Context, client *github.Client, owner string, repo str
327327
// Sanitize title/body on response
328328
if issue != nil {
329329
if issue.Title != nil {
330-
issue.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*issue.Title))
330+
issue.Title = github.Ptr(sanitize.Sanitize(*issue.Title))
331331
}
332332
if issue.Body != nil {
333-
issue.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*issue.Body))
333+
issue.Body = github.Ptr(sanitize.Sanitize(*issue.Body))
334334
}
335335
}
336336

pkg/github/pullrequests.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,10 @@ func GetPullRequest(ctx context.Context, client *github.Client, owner, repo stri
127127
// sanitize title/body on response
128128
if pr != nil {
129129
if pr.Title != nil {
130-
pr.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Title))
130+
pr.Title = github.Ptr(sanitize.Sanitize(*pr.Title))
131131
}
132132
if pr.Body != nil {
133-
pr.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Body))
133+
pr.Body = github.Ptr(sanitize.Sanitize(*pr.Body))
134134
}
135135
}
136136

@@ -821,10 +821,10 @@ func ListPullRequests(getClient GetClientFn, t translations.TranslationHelperFun
821821
continue
822822
}
823823
if pr.Title != nil {
824-
pr.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Title))
824+
pr.Title = github.Ptr(sanitize.Sanitize(*pr.Title))
825825
}
826826
if pr.Body != nil {
827-
pr.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Body))
827+
pr.Body = github.Ptr(sanitize.Sanitize(*pr.Body))
828828
}
829829
}
830830

pkg/sanitize/sanitize.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,10 @@ import (
44
"github.com/microcosm-cc/bluemonday"
55
)
66

7-
type ContentFilter struct {
8-
HTMLPolicy *bluemonday.Policy
9-
}
10-
11-
func NewContentFilter() *ContentFilter {
12-
p := bluemonday.NewPolicy()
13-
p.AllowElements("b", "blockquote", "br", "code", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "li", "ol", "p", "pre", "strong", "sub", "sup", "table", "tbody", "td", "th", "thead", "tr", "ul")
14-
p.AllowAttrs("img", "a")
15-
p.AllowAttrs()
16-
p.AllowURLSchemes("https")
7+
var policy *bluemonday.Policy
178

18-
return &ContentFilter{
19-
HTMLPolicy: p,
20-
}
9+
func Sanitize(input string) string {
10+
return FilterHTMLTags(FilterInvisibleCharacters(input))
2111
}
2212

2313
// FilterInvisibleCharacters removes invisible or control characters that should not appear
@@ -40,11 +30,25 @@ func FilterInvisibleCharacters(input string) string {
4030
return string(out)
4131
}
4232

43-
func (cf *ContentFilter) FilterHtmlTags(input string) string {
33+
func FilterHTMLTags(input string) string {
34+
if policy == nil {
35+
policyInit()
36+
}
4437
if input == "" {
4538
return input
4639
}
47-
return cf.HTMLPolicy.Sanitize(input)
40+
return policy.Sanitize(input)
41+
}
42+
43+
func policyInit() {
44+
if policy != nil {
45+
return
46+
}
47+
policy = bluemonday.StrictPolicy()
48+
policy.AllowElements("b", "blockquote", "br", "code", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "li", "ol", "p", "pre", "strong", "sub", "sup", "table", "tbody", "td", "th", "thead", "tr", "ul")
49+
policy.AllowAttrs("img", "a")
50+
policy.AllowURLSchemes("https")
51+
policy.AllowImages()
4852
}
4953

5054
func shouldRemoveRune(r rune) bool {

pkg/sanitize/sanitize_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,64 @@ func TestShouldRemoveRune(t *testing.T) {
186186
})
187187
}
188188
}
189+
190+
func TestFilterHtmlTags(t *testing.T) {
191+
tests := []struct {
192+
name string
193+
input string
194+
expected string
195+
}{
196+
{
197+
name: "empty string",
198+
input: "",
199+
expected: "",
200+
},
201+
{
202+
name: "allowed simple tags preserved",
203+
input: "<b>bold</b>",
204+
expected: "<b>bold</b>",
205+
},
206+
{
207+
name: "multiple allowed tags",
208+
input: "<b>bold</b> and <em>italic</em>",
209+
expected: "<b>bold</b> and <em>italic</em>",
210+
},
211+
{
212+
name: "code tag preserved",
213+
input: "<code>fmt.Println(\"hi\")</code>",
214+
expected: "<code>fmt.Println(&#34;hi&#34;)</code>", // quotes are escaped by sanitizer
215+
},
216+
{
217+
name: "disallowed script removed entirely",
218+
input: "<script>alert(1)</script>",
219+
expected: "", // StrictPolicy should drop script element and contents
220+
},
221+
{
222+
name: "anchor removed but inner text kept",
223+
input: "Click <a href='https://example.com'>here</a> now",
224+
expected: "Click here now",
225+
},
226+
{
227+
name: "image removed (no textual fallback)",
228+
input: "<img src='x' alt='y'>",
229+
expected: "<img src=\"x\" alt=\"y\">", // images are allowed via AllowImages()
230+
},
231+
{
232+
name: "mixed allowed and disallowed",
233+
input: "<b>bold</b> <script>alert(1)</script> <em>italic</em>",
234+
expected: "<b>bold</b> <em>italic</em>",
235+
},
236+
{
237+
name: "idempotent sanitization",
238+
input: FilterHTMLTags("<b>bold</b> and <em>italic</em>"),
239+
expected: "<b>bold</b> and <em>italic</em>",
240+
},
241+
}
242+
243+
for _, tt := range tests {
244+
t.Run(tt.name, func(t *testing.T) {
245+
result := FilterHTMLTags(tt.input)
246+
assert.Equal(t, tt.expected, result)
247+
})
248+
}
249+
}

0 commit comments

Comments
 (0)