Skip to content

Commit 3704342

Browse files
committed
Merge branch 'printer' into develop-1.20.1
Reimplemented the pretty-printer algo
1 parent 9cb943a commit 3704342

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1579
-530
lines changed

src/main/java/org/jsoup/nodes/Attribute.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,7 @@ protected final boolean shouldCollapseAttribute(Document.OutputSettings out) {
263263

264264
// collapse unknown foo=null, known checked=null, checked="", checked=checked; write out others
265265
protected static boolean shouldCollapseAttribute(final String key, @Nullable final String val, final Document.OutputSettings out) {
266-
return (
267-
out.syntax() == Syntax.html &&
266+
return (out.syntax() == Syntax.html &&
268267
(val == null || (val.isEmpty() || val.equalsIgnoreCase(key)) && Attribute.isBooleanAttribute(key)));
269268
}
270269

src/main/java/org/jsoup/nodes/CDataNode.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,11 @@ public String text() {
2525
}
2626

2727
@Override
28-
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
28+
void outerHtmlHead(Appendable accum, Document.OutputSettings out) throws IOException {
2929
accum
3030
.append("<![CDATA[")
31-
.append(getWholeText());
32-
}
33-
34-
@Override
35-
void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
36-
accum.append("]]>");
31+
.append(getWholeText())
32+
.append("]]>");
3733
}
3834

3935
@Override

src/main/java/org/jsoup/nodes/Comment.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,13 @@ public Comment setData(String data) {
3737
}
3838

3939
@Override
40-
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
41-
if (out.prettyPrint() && ((isEffectivelyFirst() && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock()) || (out.outline() )))
42-
indent(accum, depth, out);
40+
void outerHtmlHead(Appendable accum, Document.OutputSettings out) throws IOException {
4341
accum
44-
.append("<!--")
45-
.append(getData())
46-
.append("-->");
42+
.append("<!--")
43+
.append(getData())
44+
.append("-->");
4745
}
4846

49-
@Override
50-
void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) {}
51-
5247
@Override
5348
public Comment clone() {
5449
return (Comment) super.clone();

src/main/java/org/jsoup/nodes/DataNode.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public DataNode setWholeData(String data) {
3939
}
4040

4141
@Override
42-
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
42+
void outerHtmlHead(Appendable accum, Document.OutputSettings out) throws IOException {
4343
/* For XML output, escape the DataNode in a CData section. The data may contain pseudo-CData content if it was
4444
parsed as HTML, so don't double up Cdata. Output in polyglot HTML / XHTML / XML format. */
4545
final String data = getWholeData();
@@ -52,13 +52,10 @@ else if (parentNameIs("style"))
5252
accum.append("<![CDATA[").append(data).append("]]>");
5353
} else {
5454
// In HTML, data is not escaped in the output of data nodes, so < and & in script, style is OK
55-
accum.append(getWholeData());
55+
accum.append(data);
5656
}
5757
}
5858

59-
@Override
60-
void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) {}
61-
6259
@Override
6360
public DataNode clone() {
6461
return (DataNode) super.clone();

src/main/java/org/jsoup/nodes/Document.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class Document extends Element {
3737
@see #createShell
3838
*/
3939
public Document(String namespace, String baseUri) {
40-
super(Tag.valueOf("#root", namespace, ParseSettings.htmlDefault), baseUri);
40+
super(new Tag("#root", namespace), baseUri);
4141
this.location = baseUri;
4242
this.parser = Parser.htmlParser(); // default, but overridable
4343
}
@@ -217,7 +217,7 @@ public void title(String title) {
217217
@return new element
218218
*/
219219
public Element createElement(String tagName) {
220-
return new Element(Tag.valueOf(tagName, parser.defaultNamespace(), ParseSettings.preserveCase), this.baseUri());
220+
return new Element(parser.tagSet().valueOf(tagName, parser.defaultNamespace(), ParseSettings.preserveCase), this.baseUri());
221221
}
222222

223223
@Override

src/main/java/org/jsoup/nodes/DocumentType.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,7 @@ public String nodeName() {
7878
}
7979

8080
@Override
81-
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
82-
// add a newline if the doctype has a preceding node (which must be a comment)
83-
if (siblingIndex > 0 && out.prettyPrint())
84-
accum.append('\n');
85-
81+
void outerHtmlHead(Appendable accum, Document.OutputSettings out) throws IOException {
8682
if (out.syntax() == Syntax.html && !has(PublicId) && !has(SystemId)) {
8783
// looks like a html5 doctype, go lowercase for aesthetics
8884
accum.append("<!doctype");
@@ -100,9 +96,6 @@ void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) thr
10096
accum.append('>');
10197
}
10298

103-
@Override
104-
void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) {
105-
}
10699

107100
private boolean has(final String attribute) {
108101
return !StringUtil.isBlank(attr(attribute));

src/main/java/org/jsoup/nodes/Element.java

Lines changed: 35 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public class Element extends Node implements Iterable<Element> {
5252
private static final List<Element> EmptyChildren = Collections.emptyList();
5353
private static final Pattern ClassSplit = Pattern.compile("\\s+");
5454
private static final String BaseUriKey = Attributes.internalKey("baseUri");
55-
private Tag tag;
55+
Tag tag;
5656
private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
5757
List<Node> childNodes;
5858
@Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null
@@ -72,7 +72,7 @@ public Element(String tag, String namespace) {
7272
* @see #Element(String tag, String namespace)
7373
*/
7474
public Element(String tag) {
75-
this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null);
75+
this(tag, Parser.NamespaceHtml);
7676
}
7777

7878
/**
@@ -216,7 +216,8 @@ public Element tagName(String tagName) {
216216
public Element tagName(String tagName, String namespace) {
217217
Validate.notEmptyParam(tagName, "tagName");
218218
Validate.notEmptyParam(namespace, "namespace");
219-
tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse
219+
Parser parser = NodeUtils.parser(this);
220+
tag = parser.tagSet().valueOf(tagName, namespace, parser.settings()); // maintains the case option of the original parse
220221
return this;
221222
}
222223

@@ -229,6 +230,18 @@ public Tag tag() {
229230
return tag;
230231
}
231232

233+
/**
234+
Change the Tag of this element.
235+
@param tag the new tag
236+
@return this element, for chaining
237+
@since 1.20.1
238+
*/
239+
public Element tag(Tag tag) {
240+
Validate.notNull(tag);
241+
this.tag = tag;
242+
return this;
243+
}
244+
232245
/**
233246
* Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
234247
* {@code <span> == false}).
@@ -784,7 +797,8 @@ public Element appendElement(String tagName) {
784797
* @return the new element, in the specified namespace
785798
*/
786799
public Element appendElement(String tagName, String namespace) {
787-
Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
800+
Parser parser = NodeUtils.parser(this);
801+
Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri());
788802
appendChild(child);
789803
return child;
790804
}
@@ -808,7 +822,8 @@ public Element prependElement(String tagName) {
808822
* @return the new element, in the specified namespace
809823
*/
810824
public Element prependElement(String tagName, String namespace) {
811-
Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
825+
Parser parser = NodeUtils.parser(this);
826+
Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri());
812827
prependChild(child);
813828
return child;
814829
}
@@ -1463,7 +1478,7 @@ public TextAccumulator(StringBuilder accum) {
14631478
if (node instanceof Element) {
14641479
Element element = (Element) node;
14651480
Node next = node.nextSibling();
1466-
if (element.isBlock() && (next instanceof TextNode || next instanceof Element && !((Element) next).tag.formatAsBlock()) && !lastCharIsWhitespace(accum))
1481+
if (!element.tag.isInline() && (next instanceof TextNode || next instanceof Element && ((Element) next).tag.isInline()) && !lastCharIsWhitespace(accum))
14671482
accum.append(' ');
14681483
}
14691484

@@ -1564,10 +1579,8 @@ static boolean preserveWhitespace(@Nullable Node node) {
15641579
public Element text(String text) {
15651580
Validate.notNull(text);
15661581
empty();
1567-
// special case for script/style in HTML: should be data node
1568-
Document owner = ownerDocument();
1569-
// an alternate impl would be to run through the parser
1570-
if (owner != null && owner.parser().isContentForTagData(normalName()))
1582+
// special case for script/style in HTML (or customs): should be data node
1583+
if (tag().is(Tag.Data))
15711584
appendChild(new DataNode(text));
15721585
else
15731586
appendChild(new TextNode(text));
@@ -1797,20 +1810,8 @@ public Range endSourceRange() {
17971810
return Range.of(this, false);
17981811
}
17991812

1800-
boolean shouldIndent(final Document.OutputSettings out) {
1801-
return out.prettyPrint() && isFormatAsBlock(out) && !isInlineable(out) && !preserveWhitespace(parentNode);
1802-
}
1803-
18041813
@Override
1805-
void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException {
1806-
if (shouldIndent(out)) {
1807-
if (accum instanceof StringBuilder) {
1808-
if (((StringBuilder) accum).length() > 0)
1809-
indent(accum, depth, out);
1810-
} else {
1811-
indent(accum, depth, out);
1812-
}
1813-
}
1814+
void outerHtmlHead(final Appendable accum, Document.OutputSettings out) throws IOException {
18141815
accum.append('<').append(safeTagName(out.syntax()));
18151816
if (attributes != null) attributes.html(accum, out);
18161817

@@ -1826,13 +1827,8 @@ void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSetti
18261827
}
18271828

18281829
@Override
1829-
void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
1830+
void outerHtmlTail(Appendable accum, Document.OutputSettings out) throws IOException {
18301831
if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
1831-
if (out.prettyPrint() && (!childNodes.isEmpty() && (
1832-
(tag.formatAsBlock() && !preserveWhitespace(parentNode)) ||
1833-
(out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && (childNodes.get(0) instanceof Element))))
1834-
)))
1835-
indent(accum, depth, out);
18361832
accum.append("</").append(safeTagName(out.syntax())).append('>');
18371833
}
18381834
}
@@ -1857,12 +1853,16 @@ public String html() {
18571853
}
18581854

18591855
@Override
1860-
public <T extends Appendable> T html(T appendable) {
1861-
final int size = childNodes.size();
1862-
for (int i = 0; i < size; i++)
1863-
childNodes.get(i).outerHtml(appendable);
1864-
1865-
return appendable;
1856+
public <T extends Appendable> T html(T accum) {
1857+
Node child = firstChild();
1858+
if (child != null) {
1859+
Printer printer = Printer.printerFor(child, accum);
1860+
while (child != null) {
1861+
NodeTraversor.traverse(printer, child);
1862+
child = child.nextSibling();
1863+
}
1864+
}
1865+
return accum;
18661866
}
18671867

18681868
/**
@@ -1969,17 +1969,4 @@ private static final class NodeList extends ChangeNotifyingArrayList<Node> {
19691969
owner.nodelistChanged();
19701970
}
19711971
}
1972-
1973-
private boolean isFormatAsBlock(Document.OutputSettings out) {
1974-
return tag.isBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline();
1975-
}
1976-
1977-
private boolean isInlineable(Document.OutputSettings out) {
1978-
if (!tag.isInline())
1979-
return false;
1980-
return (parent() == null || parent().isBlock())
1981-
&& !isEffectivelyFirst()
1982-
&& !out.outline()
1983-
&& !nameIs("br");
1984-
}
19851972
}

src/main/java/org/jsoup/nodes/LeafNode.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import org.jsoup.helper.Validate;
44

5+
import java.io.IOException;
56
import java.util.List;
67

78
/**
@@ -108,6 +109,9 @@ protected List<Node> ensureChildNodes() {
108109
return EmptyNodes;
109110
}
110111

112+
@Override
113+
void outerHtmlTail(Appendable accum, Document.OutputSettings out) throws IOException {}
114+
111115
@Override
112116
protected LeafNode doClone(Node parent) {
113117
LeafNode clone = (LeafNode) super.doClone(parent);

src/main/java/org/jsoup/nodes/Node.java

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.jsoup.nodes;
22

3-
import org.jsoup.SerializationException;
43
import org.jsoup.helper.Validate;
54
import org.jsoup.internal.StringUtil;
65
import org.jsoup.select.NodeFilter;
@@ -767,17 +766,20 @@ public String outerHtml() {
767766
}
768767

769768
protected void outerHtml(Appendable accum) {
770-
NodeTraversor.traverse(new OuterHtmlVisitor(accum, NodeUtils.outputSettings(this)), this);
769+
Printer printer = Printer.printerFor(this, accum);
770+
NodeTraversor.traverse(printer, this);
771771
}
772772

773773
/**
774774
Get the outer HTML of this node.
775+
775776
@param accum accumulator to place HTML into
777+
@param out
776778
@throws IOException if appending to the given accumulator fails.
777779
*/
778-
abstract void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException;
780+
abstract void outerHtmlHead(final Appendable accum, final Document.OutputSettings out) throws IOException;
779781

780-
abstract void outerHtmlTail(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException;
782+
abstract void outerHtmlTail(final Appendable accum, final Document.OutputSettings out) throws IOException;
781783

782784
/**
783785
* Write this node and its children to the given {@link Appendable}.
@@ -935,32 +937,4 @@ protected Node doClone(@Nullable Node parent) {
935937

936938
return clone;
937939
}
938-
939-
private static class OuterHtmlVisitor implements NodeVisitor {
940-
private final Appendable accum;
941-
private final Document.OutputSettings out;
942-
943-
OuterHtmlVisitor(Appendable accum, Document.OutputSettings out) {
944-
this.accum = accum;
945-
this.out = out;
946-
}
947-
948-
@Override public void head(Node node, int depth) {
949-
try {
950-
node.outerHtmlHead(accum, depth, out);
951-
} catch (IOException exception) {
952-
throw new SerializationException(exception);
953-
}
954-
}
955-
956-
@Override public void tail(Node node, int depth) {
957-
if (!node.nodeName().equals("#text")) { // saves a void hit.
958-
try {
959-
node.outerHtmlTail(accum, depth, out);
960-
} catch (IOException exception) {
961-
throw new SerializationException(exception);
962-
}
963-
}
964-
}
965-
}
966940
}

0 commit comments

Comments
 (0)