Skip to content

Commit 76b391e

Browse files
committed
Cascade cache resets in StructuralEvaluator
Fixes #2277
1 parent cee7563 commit 76b391e

File tree

4 files changed

+118
-0
lines changed

4 files changed

+118
-0
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@
7070
positive step, and so would not match as expected. [1147](https://github.com/jhy/jsoup/issues/1147)
7171
* Calling `doc.charset(charset)` on an empty XML document would throw an
7272
`IndexOutOfBoundsException`. [2266](https://github.com/jhy/jsoup/issues/2266)
73+
* Fixed a memory leak when reusing a nested `StructuralEvaluator` (e.g., a selector ancestor chain like `A B C`) by
74+
ensuring cache reset calls cascade to inner members. [2277](https://github.com/jhy/jsoup/issues/2277)
7375

7476
## 1.18.3 (2024-Dec-02)
7577

src/main/java/org/jsoup/select/StructuralEvaluator.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ boolean memoMatches(final Element root, final Element element) {
3333

3434
@Override protected void reset() {
3535
threadMemo.get().clear();
36+
evaluator.reset();
3637
super.reset();
3738
}
3839

@@ -221,6 +222,14 @@ public boolean matches(Element root, Element element) {
221222
return cost;
222223
}
223224

225+
@Override
226+
protected void reset() {
227+
for (Evaluator evaluator : evaluators) {
228+
evaluator.reset();
229+
}
230+
super.reset();
231+
}
232+
224233
@Override
225234
public String toString() {
226235
return StringUtil.join(evaluators, " > ");

src/test/java/org/jsoup/select/SelectorIT.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
import org.jsoup.Jsoup;
44
import org.jsoup.nodes.Document;
5+
import org.jsoup.parser.Parser;
6+
import org.jsoup.parser.StreamParser;
57
import org.junit.jupiter.api.Test;
68

79
import java.util.concurrent.atomic.AtomicInteger;
810

911
import static org.junit.jupiter.api.Assertions.assertEquals;
12+
import static org.junit.jupiter.api.Assertions.fail;
1013

1114
public class SelectorIT {
1215

@@ -55,4 +58,39 @@ public void uncaughtException(Thread t, Throwable e) {
5558
}
5659
}
5760

61+
@Test public void streamParserSelect() throws Exception {
62+
// https://github.com/jhy/jsoup/issues/2277
63+
// The memo in the StructuralEvaluator was not getting reset correctly, and so would run out of memory
64+
// Test tracks memory consumption. Will be interesting to see how it behaves on the CI workers.
65+
66+
String xml = "<A><B><C>1";
67+
Evaluator query = QueryParser.parse("A B C");
68+
Runtime runtime = Runtime.getRuntime();
69+
70+
System.gc();
71+
Thread.sleep(100);
72+
long initialUsed = runtime.totalMemory() - runtime.freeMemory();
73+
74+
for (int i = 0; i < 50_000; i++) { // Before fix, would exceed 10MB in ~ 9000 iters
75+
try (StreamParser parser = new StreamParser(Parser.xmlParser())) {
76+
parser.parse(xml, "");
77+
parser.selectFirst(query);
78+
parser.stop();
79+
}
80+
81+
if (i % 1000 == 0) {
82+
System.gc();
83+
Thread.sleep(100);
84+
long currentUsed = runtime.totalMemory() - runtime.freeMemory();
85+
long delta = currentUsed - initialUsed;
86+
87+
// Fail if we grow + 10MB
88+
if (delta > 10_000_000) {
89+
fail(String.format("Memo leak detected. Memory increased by %,d bytes after %,d iterations",
90+
delta, i));
91+
}
92+
}
93+
}
94+
}
95+
5896
}

src/test/java/org/jsoup/select/SelectorTest.java

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,4 +1381,73 @@ public void emptyPseudo() {
13811381
Elements neg2 = doc.select("p:nth-child(-1n+2)");
13821382
assertSelectedOwnText(neg2, "1", "2");
13831383
}
1384+
1385+
// Tests that nested structural and combining evaluators get reset
1386+
private static class ResetTracker extends Evaluator {
1387+
boolean resetCalled = false;
1388+
@Override
1389+
public boolean matches(Element root, Element element) {
1390+
return true;
1391+
}
1392+
1393+
@Override
1394+
protected void reset() {
1395+
resetCalled = true;
1396+
super.reset();
1397+
}
1398+
}
1399+
1400+
@Test void notResetCascades() {
1401+
ResetTracker track = new ResetTracker();
1402+
StructuralEvaluator.Not structEval = new StructuralEvaluator.Not(track);
1403+
1404+
Document doc = Jsoup.parse("<div><p>Test</p></div>");
1405+
Element p = doc.expectFirst("p");
1406+
structEval.matches(doc, p);
1407+
1408+
assertFalse(structEval.threadMemo.get().isEmpty());
1409+
assertFalse(track.resetCalled);
1410+
1411+
structEval.reset();
1412+
assertTrue(structEval.threadMemo.get().isEmpty());
1413+
assertTrue(track.resetCalled);
1414+
}
1415+
1416+
@Test void testImmediateParentRunCascades() {
1417+
ResetTracker child = new ResetTracker();
1418+
ResetTracker parent = new ResetTracker();
1419+
1420+
StructuralEvaluator.ImmediateParentRun run = new StructuralEvaluator.ImmediateParentRun(child);
1421+
run.add(parent);
1422+
1423+
Document doc = Jsoup.parse("<div><p><span>Test</span></p></div>");
1424+
Element span = doc.expectFirst("span");
1425+
assertTrue(run.matches(doc, span));
1426+
1427+
run.reset();
1428+
assertTrue(child.resetCalled);
1429+
assertTrue(parent.resetCalled);
1430+
}
1431+
1432+
@Test
1433+
public void testAncestorChain() {
1434+
ResetTracker grandParent = new ResetTracker();
1435+
ResetTracker parent = new ResetTracker();
1436+
ResetTracker child = new ResetTracker();
1437+
1438+
StructuralEvaluator.Ancestor b_needs_a = new StructuralEvaluator.Ancestor(grandParent);
1439+
StructuralEvaluator.Ancestor c_needs_b = new StructuralEvaluator.Ancestor(parent);
1440+
CombiningEvaluator.And chain = new CombiningEvaluator.And(child, c_needs_b, b_needs_a);
1441+
1442+
Document doc = Jsoup.parse("<div class='A'><p class='B'><span class='C'>Test</span></p></div>");
1443+
Element span = doc.expectFirst("span");
1444+
assertTrue(chain.matches(doc, span), "Should match span in correct ancestor chain");
1445+
1446+
chain.reset();
1447+
assertTrue(grandParent.resetCalled);
1448+
assertTrue(parent.resetCalled);
1449+
assertTrue(child.resetCalled);
1450+
assertTrue(b_needs_a.threadMemo.get().isEmpty());
1451+
assertTrue(c_needs_b.threadMemo.get().isEmpty());
1452+
}
13841453
}

0 commit comments

Comments
 (0)