Skip to content

Commit 1de9cf4

Browse files
jvoisinfguillot
authored andcommitted
perf(readability): simplify removeUnlikelyCandidates
- Use an iterator instead of generating a whole slice when iterating on the selection. - Using an iterator allows to use a for-loop construct, instead of a lambda, which is a bit clearer - Do the filtering Find()'s selector, instead of in the loop, which doesn't matter much now that we're using an iterator, but it makes the code a bit more obvious/simpler, and likely reduces a bit the number of iterations.
1 parent 7912b9b commit 1de9cf4

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

internal/reader/readability/readability.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,22 +208,26 @@ func shouldRemoveCandidate(str string) bool {
208208
}
209209

210210
func removeUnlikelyCandidates(document *goquery.Document) {
211-
document.Find("*").Each(func(i int, s *goquery.Selection) {
212-
if s.Length() == 0 || s.Get(0).Data == "html" || s.Get(0).Data == "body" {
213-
return
211+
// Only select tags with either a class or an id attribute,
212+
// and never the html nor body tags, as we don't want to ever remove them.
213+
selector := "[class]:not(body,html)" + "," + "[id]:not(body,html)"
214+
215+
for _, s := range document.Find(selector).EachIter() {
216+
if s.Length() == 0 {
217+
continue
214218
}
215219

216220
// Don't remove elements within code blocks (pre or code tags)
217-
if s.Closest("pre, code").Length() > 0 {
218-
return
221+
if s.Closest("pre,code").Length() > 0 {
222+
continue
219223
}
220224

221225
if class, ok := s.Attr("class"); ok && shouldRemoveCandidate(class) {
222226
s.Remove()
223227
} else if id, ok := s.Attr("id"); ok && shouldRemoveCandidate(id) {
224228
s.Remove()
225229
}
226-
})
230+
}
227231
}
228232

229233
func getTopCandidate(document *goquery.Document, candidates candidateList) *candidate {

0 commit comments

Comments
 (0)