Skip to content

Commit ff69df4

Browse files
committed
Reset evaluators immediately after use, not only on reuse
That frees up the cache directly in the typical `el.select("parent child")` flow. The streaming use of evaluators still only reset on reuse, because the control flow of the stream returns to the caller before complete execution. But we also now hold the cache as a WeakHashMap, so it can be GCed when the DOM goes unreachable. Also changed to the reset to call `remove` so that the ThreadLocalMap entry is removed (vs just clearing the held map). Fixes #2411
1 parent e17d2a8 commit ff69df4

File tree

5 files changed

+15
-15
lines changed

5 files changed

+15
-15
lines changed

CHANGES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* A ValidationException could be thrown in the adoption agency algorithm with particularly broken input. Now logged as a parse error. [#2393](https://github.com/jhy/jsoup/issues/2393)
2525
* Null characters in the HTML body were not consistently removed; and in foreign content were not correctly replaced. [#2395](https://github.com/jhy/jsoup/issues/2395)
2626
* An IndexOutOfBoundsException could be thrown when parsing a body fragment with crafted input. Now logged as a parse error. [#2397](https://github.com/jhy/jsoup/issues/2397), [#2406](https://github.com/jhy/jsoup/issues/2406)
27-
27+
* When using StructuralEvaluators (e.g., a `parent child` selector) across many retained threads, their memoized results could also be retained, increasing memory use. These results are now cleared immediately after use, reducing overall memory consumption. [#2411](https://github.com/jhy/jsoup/issues/2411)
2828

2929
## 1.21.2 (2025-Aug-25)
3030

src/main/java/org/jsoup/select/Collector.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ public static Elements collect(Evaluator eval, Element root) {
3131
Stream<Element> stream = eval.wantsNodes() ?
3232
streamNodes(eval, root, Element.class) :
3333
stream(eval, root);
34-
35-
return stream.collect(toCollection(Elements::new));
34+
Elements els = stream.collect(toCollection(Elements::new));
35+
eval.reset(); // drops any held memos
36+
return els;
3637
}
3738

3839
/**

src/main/java/org/jsoup/select/StructuralEvaluator.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.jsoup.select;
22

3-
import org.jsoup.internal.Functions;
43
import org.jsoup.internal.SoftPool;
54
import org.jsoup.internal.StringUtil;
65
import org.jsoup.nodes.Element;
@@ -10,8 +9,8 @@
109
import org.jsoup.nodes.TextNode;
1110

1211
import java.util.ArrayList;
13-
import java.util.IdentityHashMap;
1412
import java.util.Map;
13+
import java.util.WeakHashMap;
1514

1615
/**
1716
* Base structural evaluator.
@@ -32,17 +31,16 @@ boolean wantsNodes() {
3231

3332
// Memoize inner matches, to save repeated re-evaluations of parent, sibling etc.
3433
// root + element: Boolean matches. ThreadLocal in case the Evaluator is compiled then reused across multi threads
35-
final ThreadLocal<IdentityHashMap<Node, IdentityHashMap<Node, Boolean>>>
36-
threadMemo = ThreadLocal.withInitial(IdentityHashMap::new);
34+
final ThreadLocal<Map<Node, Map<Node, Boolean>>> threadMemo = ThreadLocal.withInitial(WeakHashMap::new);
3735

3836
boolean memoMatches(final Element root, final Node node) {
39-
Map<Node, IdentityHashMap<Node, Boolean>> rootMemo = threadMemo.get();
40-
Map<Node, Boolean> memo = rootMemo.computeIfAbsent(root, Functions.identityMapFunction());
41-
return memo.computeIfAbsent(node, key -> evaluator.matches(root, key));
37+
Map<Node, Map<Node, Boolean>> rootMemo = threadMemo.get();
38+
Map<Node, Boolean> memo = rootMemo.computeIfAbsent(root, r -> new WeakHashMap<>());
39+
return memo.computeIfAbsent(node, test -> evaluator.matches(root, test));
4240
}
4341

4442
@Override protected void reset() {
45-
threadMemo.get().clear();
43+
threadMemo.remove();
4644
evaluator.reset();
4745
super.reset();
4846
}

src/test/java/org/jsoup/select/SelectorTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
import org.jsoup.parser.Parser;
1212
import org.junit.jupiter.api.Test;
1313

14-
import java.util.IdentityHashMap;
1514
import java.util.List;
1615
import java.util.Locale;
16+
import java.util.Map;
1717
import java.util.stream.Collectors;
1818

1919
import static org.jsoup.select.EvaluatorDebug.sexpr;
@@ -1192,7 +1192,7 @@ public void wildcardNamespaceMatchesNoNamespace() {
11921192
Evaluator eval = QueryParser.parse("p ~ p");
11931193
CombiningEvaluator.And andEval = (CombiningEvaluator.And) eval;
11941194
StructuralEvaluator.PreviousSibling prevEval = (StructuralEvaluator.PreviousSibling) andEval.evaluators.get(0);
1195-
IdentityHashMap<Node, IdentityHashMap<Node, Boolean>> map = prevEval.threadMemo.get();
1195+
Map<Node, Map<Node, Boolean>> map = prevEval.threadMemo.get();
11961196
assertEquals(0, map.size()); // no memo yet
11971197

11981198
Document doc1 = Jsoup.parse("<p>One<p>Two<p>Three");
@@ -1205,7 +1205,7 @@ public void wildcardNamespaceMatchesNoNamespace() {
12051205
assertEquals(2, s2.size());
12061206
assertEquals("Two2", s2.first().text());
12071207

1208-
assertEquals(1, map.size()); // root of doc 2
1208+
assertEquals(0, map.size()); // reset after collect
12091209
}
12101210

12111211
@Test public void blankTextNodesAreConsideredEmpty() {

src/test/java/org/jsoup/select/StructuralEvaluatorTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ void selectorMemoIsClearedOnReset(String selector, boolean expectMemos) {
4242
List<StructuralEvaluator> structuralEvals = new ArrayList<>();
4343
collectEvals(evaluator, structuralEvals);
4444

45-
Selector.select(evaluator, doc); // populate memos
45+
// use Collector.stream vs Selector.select(), as the later is able to reset after executing
46+
Collector.stream(evaluator, doc).count(); // consume stream to populate memos
4647
assertFalse(structuralEvals.isEmpty());
4748

4849
boolean hadMemos = false;

0 commit comments

Comments
 (0)