Skip to content

Commit 975bad3

Browse files
authored
Merge pull request #22 from anonyein/origin
Origin
2 parents 8ebe09c + eeae0ff commit 975bad3

File tree

12 files changed

+38
-26
lines changed

12 files changed

+38
-26
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
matrix:
1717
os: [ubuntu-latest, windows-latest, macOS-latest]
1818
# choosing to run a reduced set of LTS, current, and next, to balance coverage and execution time
19-
java: [8, 17, 21, 25]
19+
java: [8, 17, 25]
2020
fail-fast: false
2121
name: Test JDK ${{ matrix.java }}, ${{ matrix.os }}
2222
steps:

CHANGES.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
<version>1.8</version>
1212
</dependency>
1313
```
14-
(If you already have that dependency in your classpath, but you want to keep using the Java regex engine, you can disable re2j via `System.setProperty("jsoup.useRe2j", "false")`.) [#2407](https://github.com/jhy/jsoup/pull/2407)
14+
(If you already have that dependency in your classpath, but you want to keep using the Java regex engine, you can disable re2j via `System.setProperty("jsoup.useRe2j", "false")`.) You can confirm that the re2j engine has been enabled correctly by calling `Regex.usingRe2j()`. [#2407](https://github.com/jhy/jsoup/pull/2407)
1515

16-
* Added an instance method `Parser#unescape(String, boolean)` that unescapes HTML entities using the parsers configuration (e.g. to support error tracking), complementing the existing static utility `Parser.unescapeEntities(String, boolean)`. [#2396](https://github.com/jhy/jsoup/pull/2396)
16+
* Added an instance method `Parser#unescape(String, boolean)` that unescapes HTML entities using the parser's configuration (e.g. to support error tracking), complementing the existing static utility `Parser.unescapeEntities(String, boolean)`. [#2396](https://github.com/jhy/jsoup/pull/2396)
1717
* Build: added CI coverage for JDK 25 [#2403](https://github.com/jhy/jsoup/pull/2403)
1818
* Build: added a CI fuzzer for contextual fragment parsing (in addition to existing full body HTML and XML fuzzers). [oss-fuzz #14041](https://github.com/google/oss-fuzz/pull/14041)
1919

@@ -24,7 +24,10 @@
2424
* A ValidationException could be thrown in the adoption agency algorithm with particularly broken input. Now logged as a parse error. [#2393](https://github.com/jhy/jsoup/issues/2393)
2525
* Null characters in the HTML body were not consistently removed; and in foreign content were not correctly replaced. [#2395](https://github.com/jhy/jsoup/issues/2395)
2626
* An IndexOutOfBoundsException could be thrown when parsing a body fragment with crafted input. Now logged as a parse error. [#2397](https://github.com/jhy/jsoup/issues/2397), [#2406](https://github.com/jhy/jsoup/issues/2406)
27+
* When using StructuralEvaluators (e.g., a `parent child` selector) across many retained threads, their memoized results could also be retained, increasing memory use. These results are now cleared immediately after use, reducing overall memory consumption. [#2411](https://github.com/jhy/jsoup/issues/2411)
2728

29+
### Internal Changes
30+
* Deprecated internal helper `org.jsoup.internal.Functions` (for removal in v1.23.1). This was previously used to support older Android API levels without full `java.util.function` coverage; jsoup now requires core library desugaring so this indirection is no longer necessary. [#2412](https://github.com/jhy/jsoup/pull/2412)
2831

2932
## 1.21.2 (2025-Aug-25)
3033

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@
319319
<plugin>
320320
<groupId>org.sonatype.central</groupId>
321321
<artifactId>central-publishing-maven-plugin</artifactId>
322-
<version>0.8.0</version>
322+
<version>0.9.0</version>
323323
<extensions>true</extensions>
324324
<configuration>
325325
<publishingServerId>central</publishingServerId>

src/main/java/org/jsoup/helper/Regex.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public class Regex {
3333
@throws ValidationException if the regex is invalid
3434
*/
3535
public static Regex compile(String regex) {
36-
if (hasRe2j && wantsRe2j()) {
36+
if (usingRe2j()) {
3737
return Re2jRegex.compile(regex);
3838
}
3939

@@ -49,6 +49,14 @@ public static Regex fromPattern(Pattern pattern) {
4949
return new Regex(pattern);
5050
}
5151

52+
/**
53+
Checks if re2j is available (on classpath) and enabled (via system property).
54+
@return true if re2j is available and enabled
55+
*/
56+
public static boolean usingRe2j() {
57+
return hasRe2j && wantsRe2j();
58+
}
59+
5260
static boolean wantsRe2j() {
5361
return Boolean.parseBoolean(System.getProperty(SharedConstants.UseRe2j, "true"));
5462
}

src/main/java/org/jsoup/helper/UrlConnectionExecutor.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package org.jsoup.helper;
22

33
import org.jsoup.Connection;
4-
import org.jsoup.internal.Functions;
54
import org.jspecify.annotations.Nullable;
65

76
import javax.net.ssl.HttpsURLConnection;
@@ -123,7 +122,7 @@ private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnec
123122
if (key == null || val == null)
124123
continue; // skip http1.1 line
125124

126-
final List<String> vals = headers.computeIfAbsent(key, Functions.listFunction());
125+
final List<String> vals = headers.computeIfAbsent(key, k -> new java.util.ArrayList<>());
127126
vals.add(val);
128127
}
129128
return headers;

src/main/java/org/jsoup/internal/Functions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111

1212
/**
1313
* An internal class containing functions for use with {@link Map#computeIfAbsent(Object, Function)}.
14+
* @deprecated for removal in jsoup 1.23.1. Replace usages with direct constructor references / lambdas.
1415
*/
1516
@SuppressWarnings({"rawtypes", "unchecked"})
17+
@Deprecated
1618
public final class Functions {
1719
private static final Function ListFunction = key -> new ArrayList<>();
1820
private static final Function SetFunction = key -> new HashSet<>();

src/main/java/org/jsoup/safety/Safelist.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/
66
*/
77

88
import org.jsoup.helper.Validate;
9-
import org.jsoup.internal.Functions;
109
import org.jsoup.internal.Normalizer;
1110
import org.jsoup.nodes.Attribute;
1211
import org.jsoup.nodes.Attributes;
@@ -306,7 +305,7 @@ public Safelist addAttributes(String tag, String... attributes) {
306305
Validate.notEmpty(key);
307306
attributeSet.add(AttributeKey.valueOf(key));
308307
}
309-
Set<AttributeKey> currentSet = this.attributes.computeIfAbsent(tagName, Functions.setFunction());
308+
Set<AttributeKey> currentSet = this.attributes.computeIfAbsent(tagName, k -> new HashSet<>());
310309
currentSet.addAll(attributeSet);
311310
return this;
312311
}
@@ -380,7 +379,7 @@ public Safelist addEnforcedAttribute(String tag, String attribute, String value)
380379
AttributeKey attrKey = AttributeKey.valueOf(attribute);
381380
AttributeValue attrVal = AttributeValue.valueOf(value);
382381

383-
Map<AttributeKey, AttributeValue> attrMap = enforcedAttributes.computeIfAbsent(tagName, Functions.mapFunction());
382+
Map<AttributeKey, AttributeValue> attrMap = enforcedAttributes.computeIfAbsent(tagName, k -> new HashMap<>());
384383
attrMap.put(attrKey, attrVal);
385384
return this;
386385
}
@@ -453,8 +452,8 @@ public Safelist addProtocols(String tag, String attribute, String... protocols)
453452

454453
TagName tagName = TagName.valueOf(tag);
455454
AttributeKey attrKey = AttributeKey.valueOf(attribute);
456-
Map<AttributeKey, Set<Protocol>> attrMap = this.protocols.computeIfAbsent(tagName, Functions.mapFunction());
457-
Set<Protocol> protSet = attrMap.computeIfAbsent(attrKey, Functions.setFunction());
455+
Map<AttributeKey, Set<Protocol>> attrMap = this.protocols.computeIfAbsent(tagName, k -> new HashMap<>());
456+
Set<Protocol> protSet = attrMap.computeIfAbsent(attrKey, k -> new HashSet<>());
458457

459458
for (String protocol : protocols) {
460459
Validate.notEmpty(protocol);

src/main/java/org/jsoup/select/Collector.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ public static Elements collect(Evaluator eval, Element root) {
3131
Stream<Element> stream = eval.wantsNodes() ?
3232
streamNodes(eval, root, Element.class) :
3333
stream(eval, root);
34-
35-
return stream.collect(toCollection(Elements::new));
34+
Elements els = stream.collect(toCollection(Elements::new));
35+
eval.reset(); // drops any held memos
36+
return els;
3637
}
3738

3839
/**

src/main/java/org/jsoup/select/StructuralEvaluator.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.jsoup.select;
22

3-
import org.jsoup.internal.Functions;
43
import org.jsoup.internal.SoftPool;
54
import org.jsoup.internal.StringUtil;
65
import org.jsoup.nodes.Element;
@@ -10,8 +9,8 @@
109
import org.jsoup.nodes.TextNode;
1110

1211
import java.util.ArrayList;
13-
import java.util.IdentityHashMap;
1412
import java.util.Map;
13+
import java.util.WeakHashMap;
1514

1615
/**
1716
* Base structural evaluator.
@@ -32,17 +31,16 @@ boolean wantsNodes() {
3231

3332
// Memoize inner matches, to save repeated re-evaluations of parent, sibling etc.
3433
// root + element: Boolean matches. ThreadLocal in case the Evaluator is compiled then reused across multi threads
35-
final ThreadLocal<IdentityHashMap<Node, IdentityHashMap<Node, Boolean>>>
36-
threadMemo = ThreadLocal.withInitial(IdentityHashMap::new);
34+
final ThreadLocal<Map<Node, Map<Node, Boolean>>> threadMemo = ThreadLocal.withInitial(WeakHashMap::new);
3735

3836
boolean memoMatches(final Element root, final Node node) {
39-
Map<Node, IdentityHashMap<Node, Boolean>> rootMemo = threadMemo.get();
40-
Map<Node, Boolean> memo = rootMemo.computeIfAbsent(root, Functions.identityMapFunction());
41-
return memo.computeIfAbsent(node, key -> evaluator.matches(root, key));
37+
Map<Node, Map<Node, Boolean>> rootMemo = threadMemo.get();
38+
Map<Node, Boolean> memo = rootMemo.computeIfAbsent(root, r -> new WeakHashMap<>());
39+
return memo.computeIfAbsent(node, test -> evaluator.matches(root, test));
4240
}
4341

4442
@Override protected void reset() {
45-
threadMemo.get().clear();
43+
threadMemo.remove();
4644
evaluator.reset();
4745
super.reset();
4846
}

src/test/java/org/jsoup/helper/RegexTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ void tearDown() {
2828
@ValueSource(booleans = {false, true})
2929
void testRegexDelegates(boolean useRe2j) {
3030
Regex.wantsRe2j(useRe2j);
31+
assertEquals(Regex.usingRe2j(), useRe2j);
3132
String pattern = "(\\d+)";
3233
String input = "12345";
3334

0 commit comments

Comments
 (0)