Skip to content

Commit ea87ce4

Browse files
committed
try to make the code more robust (and use less memory)
1 parent abeae64 commit ea87ce4

File tree

4 files changed

+317
-15
lines changed

4 files changed

+317
-15
lines changed

src/main/java/org/htmlunit/html/DomElement.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,9 +656,7 @@ protected List<E> provideElements() {
656656
*/
657657
public <E extends HtmlElement> List<E> getStaticElementsByTagName(final String tagName) {
658658
final List<E> res = new ArrayList<>();
659-
for (final Iterator<HtmlElement> iterator
660-
= this.new DescendantElementsIterator<>(HtmlElement.class);
661-
iterator.hasNext();) {
659+
for (final Iterator<HtmlElement> iterator = this.new DescendantHtmlElementsIterator(); iterator.hasNext();) {
662660
final HtmlElement elem = iterator.next();
663661
if (elem.getLocalName().equalsIgnoreCase(tagName)) {
664662
res.add((E) elem);

src/main/java/org/htmlunit/html/DomNode.java

Lines changed: 311 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,9 +1047,7 @@ private void fireAddition(final DomNode domNode) {
10471047

10481048
// a node that is already "complete" (ie not being parsed) and not yet attached
10491049
if (!domNode.isBodyParsed() && !wasAlreadyAttached) {
1050-
for (final Iterator<DomNode> iterator
1051-
= domNode.new DescendantElementsIterator<>(DomNode.class);
1052-
iterator.hasNext();) {
1050+
for (final Iterator<DomNode> iterator = domNode.new DescendantDomNodesIterator(); iterator.hasNext();) {
10531051
final DomNode child = iterator.next();
10541052
child.attachedToPage_ = true;
10551053
child.onAllChildrenAddedToPage(true);
@@ -1359,7 +1357,7 @@ public void remove() {
13591357
* @return an {@link Iterable} that will recursively iterate over all of this node's descendants
13601358
*/
13611359
public final Iterable<DomNode> getDescendants() {
1362-
return () -> new DescendantElementsIterator<>(DomNode.class);
1360+
return () -> new DescendantDomNodesIterator();
13631361
}
13641362

13651363
/**
@@ -1371,7 +1369,7 @@ public final Iterable<DomNode> getDescendants() {
13711369
* @see #getDomElementDescendants()
13721370
*/
13731371
public final Iterable<HtmlElement> getHtmlElementDescendants() {
1374-
return () -> new DescendantElementsIterator<>(HtmlElement.class);
1372+
return () -> new DescendantHtmlElementsIterator();
13751373
}
13761374

13771375
/**
@@ -1383,13 +1381,17 @@ public final Iterable<HtmlElement> getHtmlElementDescendants() {
13831381
* @see #getHtmlElementDescendants()
13841382
*/
13851383
public final Iterable<DomElement> getDomElementDescendants() {
1386-
return () -> new DescendantElementsIterator<>(DomElement.class);
1384+
return () -> new DescendantDomElementsIterator();
13871385
}
13881386

13891387
/**
13901388
* Iterates over all descendants of a specific type, in document order.
13911389
* @param <T> the type of nodes over which to iterate
1390+
*
1391+
* @deprecated as of version 4.7.0; use {@link DescendantDomNodesIterator},
1392+
* {@link DescendantDomElementsIterator}, or {@link DescendantHtmlElementsIterator} instead.
13921393
*/
1394+
@Deprecated
13931395
protected class DescendantElementsIterator<T extends DomNode> implements Iterator<T> {
13941396

13951397
private DomNode currentNode_;
@@ -1494,6 +1496,309 @@ private DomNode getNextDomSibling(final DomNode element) {
14941496
}
14951497
}
14961498

1499+
/**
1500+
* Iterates over all descendants DomNodes, in document order.
1501+
*/
1502+
protected final class DescendantDomNodesIterator implements Iterator<DomNode> {
1503+
private DomNode currentNode_;
1504+
private DomNode nextNode_;
1505+
1506+
/**
1507+
* Creates a new instance which iterates over the specified node type.
1508+
*/
1509+
public DescendantDomNodesIterator() {
1510+
nextNode_ = getFirstChildElement(DomNode.this);
1511+
}
1512+
1513+
/** {@inheritDoc} */
1514+
@Override
1515+
public boolean hasNext() {
1516+
return nextNode_ != null;
1517+
}
1518+
1519+
/** {@inheritDoc} */
1520+
@Override
1521+
public DomNode next() {
1522+
return nextNode();
1523+
}
1524+
1525+
/** {@inheritDoc} */
1526+
@Override
1527+
public void remove() {
1528+
if (currentNode_ == null) {
1529+
throw new IllegalStateException("Unable to remove current node, because there is no current node.");
1530+
}
1531+
final DomNode current = currentNode_;
1532+
while (nextNode_ != null && current.isAncestorOf(nextNode_)) {
1533+
next();
1534+
}
1535+
current.remove();
1536+
}
1537+
1538+
/** @return the next node, if there is one */
1539+
@SuppressWarnings("unchecked")
1540+
public DomNode nextNode() {
1541+
currentNode_ = nextNode_;
1542+
1543+
DomNode next = getFirstChildElement(nextNode_);
1544+
if (next == null) {
1545+
next = getNextDomSibling(nextNode_);
1546+
}
1547+
if (next == null) {
1548+
next = getNextElementUpwards(nextNode_);
1549+
}
1550+
nextNode_ = next;
1551+
1552+
return currentNode_;
1553+
}
1554+
1555+
private DomNode getNextElementUpwards(final DomNode startingNode) {
1556+
if (startingNode == DomNode.this) {
1557+
return null;
1558+
}
1559+
1560+
DomNode parent = startingNode.getParentNode();
1561+
while (parent != null && parent != DomNode.this) {
1562+
DomNode next = parent.getNextSibling();
1563+
while (next != null && !isAccepted(next)) {
1564+
next = next.getNextSibling();
1565+
}
1566+
if (next != null) {
1567+
return next;
1568+
}
1569+
parent = parent.getParentNode();
1570+
}
1571+
return null;
1572+
}
1573+
1574+
private DomNode getFirstChildElement(final DomNode parent) {
1575+
DomNode node = parent.getFirstChild();
1576+
while (node != null && !isAccepted(node)) {
1577+
node = node.getNextSibling();
1578+
}
1579+
return node;
1580+
}
1581+
1582+
/**
1583+
* Indicates if the node is accepted. If not it won't be explored at all.
1584+
* @param node the node to test
1585+
* @return {@code true} if accepted
1586+
*/
1587+
private boolean isAccepted(final DomNode node) {
1588+
return DomNode.class.isAssignableFrom(node.getClass());
1589+
}
1590+
1591+
private DomNode getNextDomSibling(final DomNode element) {
1592+
DomNode node = element.getNextSibling();
1593+
while (node != null && !isAccepted(node)) {
1594+
node = node.getNextSibling();
1595+
}
1596+
return node;
1597+
}
1598+
}
1599+
1600+
/**
1601+
* Iterates over all descendants DomTypes, in document order.
1602+
*/
1603+
protected final class DescendantDomElementsIterator implements Iterator<DomElement> {
1604+
private DomNode currentNode_;
1605+
private DomNode nextNode_;
1606+
1607+
/**
1608+
* Creates a new instance which iterates over the specified node type.
1609+
*/
1610+
public DescendantDomElementsIterator() {
1611+
nextNode_ = getFirstChildElement(DomNode.this);
1612+
}
1613+
1614+
/** {@inheritDoc} */
1615+
@Override
1616+
public boolean hasNext() {
1617+
return nextNode_ != null;
1618+
}
1619+
1620+
/** {@inheritDoc} */
1621+
@Override
1622+
public DomElement next() {
1623+
return nextNode();
1624+
}
1625+
1626+
/** {@inheritDoc} */
1627+
@Override
1628+
public void remove() {
1629+
if (currentNode_ == null) {
1630+
throw new IllegalStateException("Unable to remove current node, because there is no current node.");
1631+
}
1632+
final DomNode current = currentNode_;
1633+
while (nextNode_ != null && current.isAncestorOf(nextNode_)) {
1634+
next();
1635+
}
1636+
current.remove();
1637+
}
1638+
1639+
/** @return the next node, if there is one */
1640+
@SuppressWarnings("unchecked")
1641+
public DomElement nextNode() {
1642+
currentNode_ = nextNode_;
1643+
1644+
DomNode next = getFirstChildElement(nextNode_);
1645+
if (next == null) {
1646+
next = getNextDomSibling(nextNode_);
1647+
}
1648+
if (next == null) {
1649+
next = getNextElementUpwards(nextNode_);
1650+
}
1651+
nextNode_ = next;
1652+
1653+
return (DomElement) currentNode_;
1654+
}
1655+
1656+
private DomNode getNextElementUpwards(final DomNode startingNode) {
1657+
if (startingNode == DomNode.this) {
1658+
return null;
1659+
}
1660+
1661+
DomNode parent = startingNode.getParentNode();
1662+
while (parent != null && parent != DomNode.this) {
1663+
DomNode next = parent.getNextSibling();
1664+
while (next != null && !isAccepted(next)) {
1665+
next = next.getNextSibling();
1666+
}
1667+
if (next != null) {
1668+
return next;
1669+
}
1670+
parent = parent.getParentNode();
1671+
}
1672+
return null;
1673+
}
1674+
1675+
private DomNode getFirstChildElement(final DomNode parent) {
1676+
DomNode node = parent.getFirstChild();
1677+
while (node != null && !isAccepted(node)) {
1678+
node = node.getNextSibling();
1679+
}
1680+
return node;
1681+
}
1682+
1683+
/**
1684+
* Indicates if the node is accepted. If not it won't be explored at all.
1685+
* @param node the node to test
1686+
* @return {@code true} if accepted
1687+
*/
1688+
private boolean isAccepted(final DomNode node) {
1689+
return DomElement.class.isAssignableFrom(node.getClass());
1690+
}
1691+
1692+
private DomNode getNextDomSibling(final DomNode element) {
1693+
DomNode node = element.getNextSibling();
1694+
while (node != null && !isAccepted(node)) {
1695+
node = node.getNextSibling();
1696+
}
1697+
return node;
1698+
}
1699+
}
1700+
1701+
/**
1702+
* Iterates over all descendants HtmlElements, in document order.
1703+
*/
1704+
protected final class DescendantHtmlElementsIterator implements Iterator<HtmlElement> {
1705+
private DomNode currentNode_;
1706+
private DomNode nextNode_;
1707+
1708+
/**
1709+
* Creates a new instance which iterates over the specified node type.
1710+
*/
1711+
public DescendantHtmlElementsIterator() {
1712+
nextNode_ = getFirstChildElement(DomNode.this);
1713+
}
1714+
1715+
/** {@inheritDoc} */
1716+
@Override
1717+
public boolean hasNext() {
1718+
return nextNode_ != null;
1719+
}
1720+
1721+
/** {@inheritDoc} */
1722+
@Override
1723+
public HtmlElement next() {
1724+
return nextNode();
1725+
}
1726+
1727+
/** {@inheritDoc} */
1728+
@Override
1729+
public void remove() {
1730+
if (currentNode_ == null) {
1731+
throw new IllegalStateException("Unable to remove current node, because there is no current node.");
1732+
}
1733+
final DomNode current = currentNode_;
1734+
while (nextNode_ != null && current.isAncestorOf(nextNode_)) {
1735+
next();
1736+
}
1737+
current.remove();
1738+
}
1739+
1740+
/** @return the next node, if there is one */
1741+
@SuppressWarnings("unchecked")
1742+
public HtmlElement nextNode() {
1743+
currentNode_ = nextNode_;
1744+
1745+
DomNode next = getFirstChildElement(nextNode_);
1746+
if (next == null) {
1747+
next = getNextDomSibling(nextNode_);
1748+
}
1749+
if (next == null) {
1750+
next = getNextElementUpwards(nextNode_);
1751+
}
1752+
nextNode_ = next;
1753+
1754+
return (HtmlElement) currentNode_;
1755+
}
1756+
1757+
private DomNode getNextElementUpwards(final DomNode startingNode) {
1758+
if (startingNode == DomNode.this) {
1759+
return null;
1760+
}
1761+
1762+
DomNode parent = startingNode.getParentNode();
1763+
while (parent != null && parent != DomNode.this) {
1764+
DomNode next = parent.getNextSibling();
1765+
while (next != null && !isAccepted(next)) {
1766+
next = next.getNextSibling();
1767+
}
1768+
if (next != null) {
1769+
return next;
1770+
}
1771+
parent = parent.getParentNode();
1772+
}
1773+
return null;
1774+
}
1775+
1776+
private DomNode getFirstChildElement(final DomNode parent) {
1777+
DomNode node = parent.getFirstChild();
1778+
while (node != null && !isAccepted(node)) {
1779+
node = node.getNextSibling();
1780+
}
1781+
return node;
1782+
}
1783+
1784+
/**
1785+
* Indicates if the node is accepted. If not it won't be explored at all.
1786+
* @param node the node to test
1787+
* @return {@code true} if accepted
1788+
*/
1789+
private boolean isAccepted(final DomNode node) {
1790+
return HtmlElement.class.isAssignableFrom(node.getClass());
1791+
}
1792+
1793+
private DomNode getNextDomSibling(final DomNode element) {
1794+
DomNode node = element.getNextSibling();
1795+
while (node != null && !isAccepted(node)) {
1796+
node = node.getNextSibling();
1797+
}
1798+
return node;
1799+
}
1800+
}
1801+
14971802
/**
14981803
* Returns this node's ready state (IE only).
14991804
* @return this node's ready state

src/main/java/org/htmlunit/html/HtmlPage.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,9 +1745,8 @@ void notifyNodeAdded(final DomNode node) {
17451745
frameElements_.add((BaseFrameElement) node);
17461746
}
17471747

1748-
for (final Iterator<HtmlElement> iterator
1749-
= node.new DescendantElementsIterator<>(HtmlElement.class);
1750-
iterator.hasNext();) {
1748+
for (final Iterator<HtmlElement> iterator = node.new DescendantHtmlElementsIterator();
1749+
iterator.hasNext();) {
17511750
final HtmlElement child = iterator.next();
17521751
if (child instanceof BaseFrameElement) {
17531752
frameElements_.add((BaseFrameElement) child);

0 commit comments

Comments
 (0)