Skip to content

Commit 30bfd0b

Browse files
committed
refactor(sanitizer): use a smarter approach wrt. SanitizeHTML
The issue with slices.Contains is that it keeps growing and never shrinks. An rss entry could start with a bunch of random tags, and those would always be considered valid in the `case html.EndTagToken` case. While can't use a strict push/pop queue, as we might have things like <video><source></video> that are valid HTML, we can look in the slice for the farther index of the tag that is being closed, and pop everything after it.
1 parent c171da1 commit 30bfd0b

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

internal/reader/sanitizer/sanitizer.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,16 @@ type SanitizerOptions struct {
202202
OpenLinksInNewTab bool
203203
}
204204

205+
// lastIndex returns the index of the last matching element e present in s.
206+
func lastIndex[T comparable](s []T, e T) int {
207+
for i := len(s) - 1; i >= 0; i-- {
208+
if s[i] == e {
209+
return i
210+
}
211+
}
212+
return -1
213+
}
214+
205215
func SanitizeHTML(baseURL, rawHTML string, sanitizerOptions *SanitizerOptions) string {
206216
var tagStack []string
207217
var parentTag string
@@ -278,8 +288,13 @@ func SanitizeHTML(baseURL, rawHTML string, sanitizerOptions *SanitizerOptions) s
278288
}
279289
case html.EndTagToken:
280290
if len(blockedStack) == 0 {
281-
if isValidTag(tagName) && slices.Contains(tagStack, tagName) {
282-
buffer.WriteString("</" + tagName + ">")
291+
if isValidTag(tagName) {
292+
// We can't use a strict push/pop queue, as we might have things like <video><source></video>
293+
// that are valid HTML.
294+
if idx := lastIndex(tagStack, tagName); idx != -1 {
295+
tagStack = tagStack[:idx]
296+
buffer.WriteString("</" + tagName + ">")
297+
}
283298
}
284299
} else {
285300
if blockedStack[len(blockedStack)-1] == tagName {

0 commit comments

Comments
 (0)