Skip to content

Commit e0336c0

Browse files
committed
Optimized out id() and ownerDoc() calls
1 parent 26351ef commit e0336c0

File tree

3 files changed

+54
-36
lines changed

3 files changed

+54
-36
lines changed

CHANGES.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# jsoup Changelog
22

3+
## 1.19.2 (PENDING)
4+
5+
### Changes
6+
7+
### Improvements
8+
* `Element.cssSelector()` will prefer to return shorter selectors by using ancestor IDs when available and unique. E.g. `#id > div > p` instead of `html > body > div > div > p` [#2283](https://github.com/jhy/jsoup/pull/2283).
9+
310
## 1.19.1 (2025-03-04)
411

512
### Changes

src/main/java/org/jsoup/nodes/Element.java

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -937,47 +937,47 @@ public Element wrap(String html) {
937937
return (Element) super.wrap(html);
938938
}
939939

940-
private String idAsCssSelector() {
941-
String result = "";
942-
943-
if (!id().isEmpty()) {
944-
// prefer to return the ID - but check that it's actually unique first!
945-
String idSel = "#" + escapeCssIdentifier(id());
946-
Document doc = ownerDocument();
947-
if (doc != null) {
948-
Elements els = doc.select(idSel);
949-
if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl
950-
result = idSel;
940+
/**
941+
Gets an #id selector for this element, if it has a unique ID. Otherwise, returns an empty string.
942+
943+
@param ownerDoc the document that owns this element, if there is one
944+
*/
945+
private String uniqueIdSelector(@Nullable Document ownerDoc) {
946+
String id = id();
947+
if (!id.isEmpty()) { // check if the ID is unique and matches this
948+
String idSel = "#" + escapeCssIdentifier(id);
949+
if (ownerDoc != null) {
950+
Elements els = ownerDoc.select(idSel);
951+
if (els.size() == 1 && els.get(0) == this) return idSel;
951952
} else {
952-
result = idSel; // no ownerdoc, return the ID selector
953+
return idSel;
953954
}
954955
}
955-
956-
return result;
956+
return EmptyString;
957957
}
958958

959959
/**
960-
* Get a CSS selector that will uniquely select this element.
961-
* <p>
962-
* If the element has an ID, returns #id;
963-
* otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
964-
* followed by a unique selector for the element (tag.class.class:nth-child(n)).
965-
* </p>
966-
*
967-
* @return the CSS Path that can be used to retrieve the element in a selector.
960+
Get a CSS selector that will uniquely select this element.
961+
<p>
962+
If the element has an ID, returns #id; otherwise returns the parent (if any) CSS selector, followed by
963+
{@literal '>'}, followed by a unique selector for the element (tag.class.class:nth-child(n)).
964+
</p>
965+
966+
@return the CSS Path that can be used to retrieve the element in a selector.
968967
*/
969968
public String cssSelector() {
970-
String idAsCssSelector = idAsCssSelector();
971-
if (!idAsCssSelector.isEmpty())
972-
return idAsCssSelector;
969+
Document ownerDoc = ownerDocument();
970+
String idSel = uniqueIdSelector(ownerDoc);
971+
if (!idSel.isEmpty()) return idSel;
973972

973+
// No unique ID, work up the parent stack and find either a unique ID to hang from, or just a GP > Parent > Child chain
974974
StringBuilder selector = StringUtil.borrowBuilder();
975975
Element el = this;
976976
while (el != null && !(el instanceof Document)) {
977-
idAsCssSelector = el.idAsCssSelector();
978-
if (!idAsCssSelector.isEmpty()) {
979-
selector.insert(0, idAsCssSelector);
980-
break;
977+
idSel = el.uniqueIdSelector(ownerDoc);
978+
if (!idSel.isEmpty()) {
979+
selector.insert(0, idSel);
980+
break; // found a unique ID to use as ancestor; stop
981981
}
982982
selector.insert(0, el.cssSelectorComponent());
983983
el = el.parent();

src/test/java/org/jsoup/nodes/ElementTest.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2594,13 +2594,24 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
25942594

25952595
@Test void cssSelectorParentWithId() {
25962596
// https://github.com/jhy/jsoup/issues/2282
2597-
Document doc = Jsoup.parse("<div><div id=\"id1\"><p>A</p></div><div><p>B</p></div><div class=\"c1 c2\"><p>C</p></div></div>");
2598-
Element divA = doc.select("div div p").get(0);
2599-
Element divB = doc.select("div div p").get(1);
2600-
Element divC = doc.select("div div p").get(2);
2601-
assertEquals("#id1 > p", divA.cssSelector());
2602-
assertEquals("html > body > div > div:nth-child(2) > p", divB.cssSelector());
2603-
assertEquals("html > body > div > div.c1.c2 > p", divC.cssSelector());
2597+
Document doc = Jsoup.parse("<div><div id=id1><p>A</p></div><div><p>B</p></div><div class='c1 c2'><p>C</p></div></div>");
2598+
Elements els = doc.select("p");
2599+
Element pA = els.get(0);
2600+
Element pB = els.get(1);
2601+
Element pC = els.get(2);
2602+
assertEquals("#id1 > p", pA.cssSelector());
2603+
assertEquals("html > body > div > div:nth-child(2) > p", pB.cssSelector());
2604+
assertEquals("html > body > div > div.c1.c2 > p", pC.cssSelector());
2605+
}
2606+
2607+
@Test void cssSelectorWithNonUniqueId() {
2608+
Document doc = Jsoup.parse("<main id=out><div><div id=in>One</div><div id=in>Two</div></div></main>");
2609+
Element two = doc.expectFirst("div:containsOwn(Two)");
2610+
String selector = two.cssSelector();
2611+
assertEquals("#out > div > div:nth-child(2)", selector);
2612+
Elements found = doc.select(selector);
2613+
assertEquals(1, found.size());
2614+
assertEquals(two, found.first());
26042615
}
26052616

26062617
@Test void cssSelectorDoesntStackOverflow() {

0 commit comments

Comments
 (0)