-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Add html filtering #1356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add html filtering #1356
Changes from 6 commits
062c13a
d2d09b7
ae62cc9
c01b5d0
058c0f3
5b4fcd6
9e8d209
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,18 @@ | ||
| package sanitize | ||
|
|
||
| import ( | ||
| "sync" | ||
|
|
||
| "github.com/microcosm-cc/bluemonday" | ||
| ) | ||
|
|
||
| var policy *bluemonday.Policy | ||
| var policyOnce sync.Once | ||
|
|
||
| func Sanitize(input string) string { | ||
| return FilterHTMLTags(FilterInvisibleCharacters(input)) | ||
| } | ||
|
|
||
| // FilterInvisibleCharacters removes invisible or control characters that should not appear | ||
| // in user-facing titles or bodies. This includes: | ||
| // - Unicode tag characters: U+E0001, U+E0020–U+E007F | ||
|
|
@@ -20,6 +33,43 @@ func FilterInvisibleCharacters(input string) string { | |
| return string(out) | ||
| } | ||
|
|
||
| func FilterHTMLTags(input string) string { | ||
| if policy == nil { | ||
| policyInit() | ||
| } | ||
| if input == "" { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could also check if the string has any HTML in the first place in this early return?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting idea, although an early return that has to parse the content might not be an optimisation. Hard to tell without getting into the weeds.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I was mainly thinking about is just adding a simple
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bluemonday does html input tokenization and I don't want to reinvent the wheel here. :) |
||
| return input | ||
| } | ||
| return policy.Sanitize(input) | ||
| } | ||
|
|
||
| func policyInit() { | ||
| policyOnce.Do(func() { | ||
| p := bluemonday.StrictPolicy() | ||
|
|
||
| p.AllowElements( | ||
| "b", "blockquote", "br", "code", "em", | ||
| "h1", "h2", "h3", "h4", "h5", "h6", | ||
| "hr", "i", "li", "ol", "p", "pre", | ||
| "strong", "sub", "sup", "table", "tbody", | ||
| "td", "th", "thead", "tr", "ul", | ||
| "a", "img", | ||
| ) | ||
|
|
||
| p.AllowAttrs("href").OnElements("a") | ||
| p.AllowURLSchemes("https") | ||
JoannaaKL marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| p.RequireParseableURLs(true) | ||
| p.RequireNoFollowOnLinks(true) | ||
| p.RequireNoReferrerOnLinks(true) | ||
| p.AddTargetBlankToFullyQualifiedLinks(true) | ||
|
|
||
| p.AllowImages() | ||
| p.AllowAttrs("src", "alt", "title").OnElements("img") | ||
|
|
||
| policy = p | ||
| }) | ||
| } | ||
|
|
||
| func shouldRemoveRune(r rune) bool { | ||
| switch r { | ||
| case 0x200B, // ZERO WIDTH SPACE | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.